Coli# Importa le librerie
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from statsmodels.stats.diagnostic import het_breuschpagan
from statsmodels.stats.outliers_influence import variance_inflation_factor
import statsmodels.api as sm
# Carica i dati
data = pd.read_csv("tuo_dataset.csv")
# Crea una matrice di design e una variabile dipendente
X = data [ ]
y = data
# Dividi i dati in training set e test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Crea un modello di regressione lineare
model = LinearRegression()
# Addestra il modello
model.fit(X_train, y_train)
# Fai previsioni
y_pred = model.predict(X_test)
# Verifica l'omoschedasticità
_, p_value, _, _ = het_breuschpagan(y_test - y_pred, X_test)
if p_value < 0.05:
print("Viola l'omoschedasticità")
else:
print("Omoschedasticità soddisfatta")
# Verifica la collinearità
vif = pd.DataFrame()
vif = [variance_inflation_factor(X.values, i) for i in range(X.shape )]
vif = X.columns
print(vif)