Update app.py
Browse files
app.py
CHANGED
|
@@ -39,7 +39,7 @@ def download_dataset():
|
|
| 39 |
# β
Download dataset at startup
|
| 40 |
download_dataset()
|
| 41 |
|
| 42 |
-
# β
Load dataset with
|
| 43 |
try:
|
| 44 |
df = pd.read_csv(DATASET_PATH, encoding="utf-8", delimiter=",", on_bad_lines="skip")
|
| 45 |
except:
|
|
@@ -54,14 +54,17 @@ for col in required_columns:
|
|
| 54 |
if col not in df.columns:
|
| 55 |
raise Exception(f"β Column '{col}' is missing from the dataset!")
|
| 56 |
|
|
|
|
|
|
|
|
|
|
| 57 |
# β
Encode categorical variables
|
| 58 |
encoder = LabelEncoder()
|
| 59 |
-
df["Case Problem"] = encoder.fit_transform(df["Case Problem"])
|
| 60 |
-
df["Feedback"] = encoder.fit_transform(df["Feedback"])
|
| 61 |
|
| 62 |
# β
Train Model
|
| 63 |
-
X = df[["Case Problem"]]
|
| 64 |
-
y = df["Feedback"]
|
| 65 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
| 66 |
model = RandomForestClassifier(n_estimators=100, random_state=42)
|
| 67 |
model.fit(X_train, y_train)
|
|
@@ -70,6 +73,9 @@ model.fit(X_train, y_train)
|
|
| 70 |
joblib.dump(model, "feedback_model.pkl")
|
| 71 |
print("β
Model trained successfully!")
|
| 72 |
|
|
|
|
|
|
|
|
|
|
| 73 |
# β
API Input Model
|
| 74 |
class PredictionInput(BaseModel):
|
| 75 |
case_problem: str
|
|
@@ -80,26 +86,34 @@ def predict_feedback(data: PredictionInput):
|
|
| 80 |
if model is None:
|
| 81 |
return {"error": "Model is not trained yet."}
|
| 82 |
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
return {"
|
| 88 |
-
|
| 89 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
|
| 91 |
# β
Gradio UI
|
| 92 |
def gradio_interface(case_problem):
|
| 93 |
if model is None:
|
| 94 |
return "Model not trained yet."
|
| 95 |
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
return
|
| 101 |
-
|
| 102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
|
| 104 |
# β
Start both API & Gradio
|
| 105 |
def start_app():
|
|
|
|
| 39 |
# β
Download dataset at startup
|
| 40 |
download_dataset()
|
| 41 |
|
| 42 |
+
# β
Load dataset with error handling
|
| 43 |
try:
|
| 44 |
df = pd.read_csv(DATASET_PATH, encoding="utf-8", delimiter=",", on_bad_lines="skip")
|
| 45 |
except:
|
|
|
|
| 54 |
if col not in df.columns:
|
| 55 |
raise Exception(f"β Column '{col}' is missing from the dataset!")
|
| 56 |
|
| 57 |
+
# β
Convert "Case Problem" column to lowercase for consistency
|
| 58 |
+
df["Case Problem"] = df["Case Problem"].str.lower()
|
| 59 |
+
|
| 60 |
# β
Encode categorical variables
|
| 61 |
encoder = LabelEncoder()
|
| 62 |
+
df["Case Problem Encoded"] = encoder.fit_transform(df["Case Problem"])
|
| 63 |
+
df["Feedback Encoded"] = encoder.fit_transform(df["Feedback"])
|
| 64 |
|
| 65 |
# β
Train Model
|
| 66 |
+
X = df[["Case Problem Encoded"]]
|
| 67 |
+
y = df["Feedback Encoded"]
|
| 68 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
| 69 |
model = RandomForestClassifier(n_estimators=100, random_state=42)
|
| 70 |
model.fit(X_train, y_train)
|
|
|
|
| 73 |
joblib.dump(model, "feedback_model.pkl")
|
| 74 |
print("β
Model trained successfully!")
|
| 75 |
|
| 76 |
+
# β
Save encoder classes for future use
|
| 77 |
+
joblib.dump(encoder, "case_problem_encoder.pkl")
|
| 78 |
+
|
| 79 |
# β
API Input Model
|
| 80 |
class PredictionInput(BaseModel):
|
| 81 |
case_problem: str
|
|
|
|
| 86 |
if model is None:
|
| 87 |
return {"error": "Model is not trained yet."}
|
| 88 |
|
| 89 |
+
# β
Convert input to lowercase to match training data
|
| 90 |
+
case_problem_lower = data.case_problem.lower()
|
| 91 |
+
|
| 92 |
+
if case_problem_lower not in df["Case Problem"].values:
|
| 93 |
+
return {"error": "Invalid case problem. Please enter a valid category from the dataset."}
|
| 94 |
+
|
| 95 |
+
case_problem_encoded = encoder.transform([case_problem_lower])
|
| 96 |
+
prediction = model.predict([[case_problem_encoded[0]]])
|
| 97 |
+
feedback_predicted = encoder.inverse_transform(prediction)[0]
|
| 98 |
+
|
| 99 |
+
return {"Predicted Feedback": feedback_predicted}
|
| 100 |
|
| 101 |
# β
Gradio UI
|
| 102 |
def gradio_interface(case_problem):
|
| 103 |
if model is None:
|
| 104 |
return "Model not trained yet."
|
| 105 |
|
| 106 |
+
# β
Convert input to lowercase for consistency
|
| 107 |
+
case_problem_lower = case_problem.lower()
|
| 108 |
+
|
| 109 |
+
if case_problem_lower not in df["Case Problem"].values:
|
| 110 |
+
return "Invalid case problem. Please enter a valid category from the dataset."
|
| 111 |
+
|
| 112 |
+
case_problem_encoded = encoder.transform([case_problem_lower])
|
| 113 |
+
prediction = model.predict([[case_problem_encoded[0]]])
|
| 114 |
+
feedback_predicted = encoder.inverse_transform(prediction)[0]
|
| 115 |
+
|
| 116 |
+
return f"Predicted Feedback: {feedback_predicted}"
|
| 117 |
|
| 118 |
# β
Start both API & Gradio
|
| 119 |
def start_app():
|