Predictor de Precio de Seguro
Completa el formulario para obtener una predicción del precio anual del seguro
Ejemplo de predicción de precios de seguros médicos
Código de Entrenamiento del Modelo (Random Forest)
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer, TransformedTargetRegressor
from sklearn.preprocessing import StandardScaler, OrdinalEncoder, OneHotEncoder, PowerTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
import sys
import os
df_origin=pd.read_csv("insurance.csv")
X, y = df.drop(['charges'], axis=1), df['charges']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)
# Definir las columnas por tipo
numeric_features = ['bmi', 'age', 'children']
ordinal_features = ['smoker']
categorical_features = ['sex', 'region']
# Crear el preprocessor para las features
preprocessor = ColumnTransformer([
('numeric', 'passthrough', numeric_features),
('ordinal', OrdinalEncoder(categories=[['no', 'yes']]), ordinal_features),
('categorical', OneHotEncoder(drop='first'), categorical_features)
])
# Crear el modelo base RandomForest
model = RandomForestRegressor(random_state=42)
# Pipeline completo con preprocessor + modelo
ml_pipeline = Pipeline([
('preprocessor', preprocessor),
('model', model)
])
# Añadir Box-Cox al target con TransformedTargetRegressor
final_model = TransformedTargetRegressor(
regressor=ml_pipeline,
transformer=PowerTransformer(method='box-cox')
)
param_distributions = {
'regressor__model__n_estimators': randint(20, 200),
'regressor__model__max_depth': [None] + list(range(5, 30)),
'regressor__model__min_samples_split': randint(2, 20),
'regressor__model__min_samples_leaf': randint(1, 10),
'regressor__model__max_features': ['sqrt', 'log2', None],
'regressor__model__bootstrap': [True, False]
}
random_search = RandomizedSearchCV(
estimator=final_model,
param_distributions=param_distributions,
n_iter=50, # Número de combinaciones a probar
cv=5,
scoring='neg_mean_squared_error',
n_jobs=-1,
verbose=1,
random_state=42
)
random_search.fit(X_train, y_train)
# Mejor modelo encontrado
best_model = random_search.best_estimator_
print(f"\nMejores parámetros: {random_search.best_params_}")
print(f"Mejor score CV: {-random_search.best_score_:.2f}")
print("Mejores parámetros encontrados:")
for param, value in random_search.best_params_.items():
print(f"{param}: {value}")
y_pred = best_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
print(f"\nResultados en test set:")
print(f"MSE: {mse:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"R²: {r2:.4f}")
print(f"MAE: {mae:.2f}")
Análisis de Datos del Modelo