| | import subprocess |
| | import sys |
| | import os |
| |
|
| | requirements_file = "requirements.txt" |
| |
|
| | if os.path.exists(requirements_file): |
| | try: |
| | subprocess.check_call([sys.executable, "-m", "pip", "install", "-r", requirements_file]) |
| | print(f"Successfully installed requirements from {requirements_file}") |
| | except subprocess.CalledProcessError as e: |
| | print(f"Error installing requirements: {e}") |
| | except FileNotFoundError: |
| | print("pip not found. Ensure pip is installed and in your PATH.") |
| | except Exception as generic_exception: |
| | print(f"An unexpected error occured: {generic_exception}") |
| | else: |
| | print(f"Requirements file not found: {requirements_file}") |
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| | import numpy as np |
| | import statsmodels.api as sm |
| | import matplotlib.pyplot as plt |
| | from sklearn.model_selection import train_test_split |
| |
|
| | |
| | filepath="data.txt" |
| | test_size= 0.2 |
| | random_state=0 |
| |
|
| | data = np.loadtxt(filepath,delimiter=',') |
| |
|
| | |
| | X = data[:, :-1] |
| | y = data[:, -1] |
| |
|
| | |
| | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state) |
| |
|
| | print("Data loaded and split successfully.") |
| | print(f"X_train shape: {X_train.shape}") |
| | print(f"X_test shape: {X_test.shape}") |
| | print(f"y_train shape: {y_train.shape}") |
| | print(f"y_test shape: {y_test.shape}") |
| |
|
| |
|
| | if X is not None: |
| | num_inputs = X.shape[1] |
| |
|
| | |
| | for i in range(num_inputs): |
| | plt.figure(figsize=(8, 6)) |
| | plt.scatter(X[:, i], y) |
| | plt.xlabel(f"Input {i+1}") |
| | plt.ylabel("Target") |
| | plt.title(f"Scatter Plot: Input {i+1} vs. Target") |
| | plt.grid(True) |
| | plt.show() |
| |
|
| | |
| | correlation = np.corrcoef(X[:, i], y)[0, 1] |
| | print(f"Correlation between Input {i+1} and Target: {correlation:.4f}") |
| |
|
| | if abs(correlation) > 0.7: |
| | print("Strong relationship detected.") |
| | elif abs(correlation) >0.3: |
| | print("Moderate relationship detected.") |
| | else: |
| | print("Weak or no linear relationship detected.") |
| | print("-" * 30) |
| | else: |
| | print("Data loading or splitting failed.") |
| |
|
| |
|
| |
|
| | |
| | X_train_const = sm.add_constant(X_train) |
| |
|
| | |
| | model = sm.OLS(y_train, X_train_const).fit() |
| |
|
| | |
| | print(model.summary()) |
| |
|
| |
|
| | |
| | p_values = model.pvalues[1:] |
| | significant_inputs = np.where(p_values < 0.05)[0] |
| | insignificant_inputs = np.where(p_values >= 0.05)[0] |
| |
|
| | print("\nSignificant Inputs (indices):", significant_inputs) |
| | print("Insignificant Inputs (indices):", insignificant_inputs) |
| |
|
| |
|
| | if len(insignificant_inputs) > 0: |
| | significant_columns = np.concatenate(([0], significant_inputs + 1)) |
| | Xs_train_const = X_train_const[:, significant_columns] |
| | Xs_test = X_test[:, significant_inputs] |
| | Xs_test_const = sm.add_constant(Xs_test) |
| |
|
| | model_reduced = sm.OLS(y_train, Xs_train_const).fit() |
| | print("\nModel Summary after dropping insignificant inputs:") |
| | print(model_reduced.summary()) |
| | else: |
| | print("\nNo insignificant inputs found. No model retraining needed.") |
| |
|
| | print("\nUnderstanding Insignificant Inputs:") |
| | print("Insignificant inputs, as indicated by their high p-values (typically > 0.05), suggest that they do not have a statistically significant linear relationship with the target variable, given the other inputs in the model. In other words, their coefficients are not reliably different from zero. Removing them can simplify the model and potentially improve its generalization performance by reducing noise. They contribute little to explaining the variance in the target variable.") |
| |
|
| |
|
| |
|
| | from sklearn.linear_model import LinearRegression |
| | from sklearn.metrics import r2_score |
| |
|
| | |
| | model_sklearn = LinearRegression() |
| | model_sklearn.fit(Xs_train_const[:, 1:], y_train) |
| |
|
| | |
| | y_train_pred = model_sklearn.predict(Xs_train_const[:, 1:]) |
| |
|
| | |
| | y_test_pred = model_sklearn.predict(Xs_test) |
| |
|
| | |
| | r2_train = r2_score(y_train, y_train_pred) |
| | r2_test = r2_score(y_test, y_test_pred) |
| |
|
| | print(f"\nR-squared score on the training set: {r2_train:.4f}") |
| | print(f"R-squared score on the test set: {r2_test:.4f}") |
| |
|
| |
|
| | |
| | if r2_train > r2_test and (r2_train - r2_test) > 0.1: |
| | print("\nOverfitting: The model performs significantly better on the training data than on the test data.") |
| | elif r2_train < r2_test: |
| | print("\nUnderfitting: The model performs poorly on both the training and test data.") |
| | else: |
| | print("\nGood fit: The model performs well on both the training and test data, indicating good generalization.") |
| |
|
| |
|
| |
|
| | |
| | |
| | import gradio as gr |
| |
|
| | |
| | input_components = [gr.Number(label=f"Input {i+1}") for i in significant_inputs] |
| |
|
| | def predict(*inputs): |
| | input_array = np.array(inputs).reshape(1, -1) |
| | prediction = model_sklearn.predict(input_array)[0] |
| | return prediction |
| |
|
| | iface = gr.Interface( |
| | fn=predict, |
| | inputs=input_components, |
| | outputs=gr.Number(label="Prediction"), |
| | title="Linear Regression Prediction App", |
| | description="Enter the input values to get the predicted target value." |
| | ) |
| | iface.launch(share=True) |
| |
|
| |
|
| |
|