File size: 5,521 Bytes
7d07c47 4da6b1d 7d07c47 db86ab6 7d07c47 bb3939d 7d07c47 451a6aa 7d07c47 451a6aa 7d07c47 c45f63b 10b4411 c45f63b bb3939d c45f63b 7d07c47 c45f63b a9baa2a 7d07c47 bb3939d 7d07c47 d5c67c2 7d07c47 db86ab6 7d07c47 c45f63b a9baa2a 10b4411 bb3939d a9baa2a bb3939d a9baa2a bb3939d db86ab6 c45f63b db86ab6 c45f63b db86ab6 d5c67c2 7d07c47 c45f63b 10b4411 bb3939d a9baa2a bb3939d db86ab6 c45f63b db86ab6 c45f63b db86ab6 7d07c47 d5c67c2 7d07c47 87ecbe0 d5c67c2 87ecbe0 a9baa2a 7d07c47 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
from fastapi import FastAPI
import pandas as pd
import uvicorn
import joblib
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from pydantic import BaseModel
import gradio as gr
import os
import requests
app = FastAPI()
# β
Correct Hugging Face Dataset URL
DATASET_URL = "https://huggingface.co/datasets/SailajaS/CDART/resolve/9bfa82e31390ff6523f6b93777f745a78ecb2cd6/Sample_Case_Records__Real_Unique.csv?download=true"
# File path for saving dataset
DATASET_PATH = "dataset.csv"
# Function to download dataset
def download_dataset():
print("π₯ Downloading dataset from Hugging Face...")
try:
response = requests.get(DATASET_URL, timeout=10)
if response.status_code == 200:
with open(DATASET_PATH, "wb") as file:
file.write(response.content)
print("β
Dataset downloaded successfully!")
else:
raise Exception(f"β Failed to download dataset: {response.status_code}")
except requests.exceptions.RequestException as e:
print(f"β Error downloading dataset: {e}")
raise Exception("Dataset download failed.")
# β
Download dataset at startup
download_dataset()
# β
Load dataset with error handling
try:
df = pd.read_csv(DATASET_PATH, encoding="utf-8", delimiter=",", on_bad_lines="skip")
except:
try:
df = pd.read_csv(DATASET_PATH, encoding="utf-8", delimiter=";", on_bad_lines="skip")
except:
raise Exception("β Unable to read CSV. Check delimiter and format.")
# β
Check if necessary columns exist
required_columns = ["Case Problem", "Feedback"]
for col in required_columns:
if col not in df.columns:
raise Exception(f"β Column '{col}' is missing from the dataset!")
# β
Convert "Case Problem" & "Feedback" to lowercase and remove spaces
df["Case Problem"] = df["Case Problem"].astype(str).str.strip().str.lower()
df["Feedback"] = df["Feedback"].astype(str).str.strip().str.lower()
# β
Train and save LabelEncoders for both input and output
case_problem_encoder = LabelEncoder()
feedback_encoder = LabelEncoder()
df["Case Problem Encoded"] = case_problem_encoder.fit_transform(df["Case Problem"])
df["Feedback Encoded"] = feedback_encoder.fit_transform(df["Feedback"])
# β
Save encoders
joblib.dump(case_problem_encoder, "case_problem_encoder.pkl")
joblib.dump(feedback_encoder, "feedback_encoder.pkl")
# β
Train Model
X = df[["Case Problem Encoded"]]
y = df["Feedback Encoded"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
# β
Save model
joblib.dump(model, "feedback_model.pkl")
print("β
Model trained successfully!")
# β
API Input Model
class PredictionInput(BaseModel):
case_problem: str
@app.post("/predict/")
async def predict_feedback(data: PredictionInput):
""" Predicts feedback based on Case Problem """
if model is None:
return {"error": "Model is not trained yet."}
# β
Load encoders
case_problem_encoder = joblib.load("case_problem_encoder.pkl")
feedback_encoder = joblib.load("feedback_encoder.pkl")
# β
Convert input to lowercase and remove spaces
case_problem_lower = data.case_problem.strip().lower()
# β
Check if input exists in training data
if case_problem_lower not in df["Case Problem"].values:
valid_problems = list(df["Case Problem"].unique()) # Get valid options
return {
"error": f"Invalid case problem. Please enter a valid category.",
"Valid Categories": valid_problems
}
try:
case_problem_encoded = case_problem_encoder.transform([case_problem_lower])
prediction = model.predict([[case_problem_encoded[0]]])
feedback_predicted = feedback_encoder.inverse_transform(prediction)[0]
return {"Predicted Feedback": feedback_predicted}
except Exception as e:
return {"error": str(e)}
# β
Gradio UI with Submit button
def gradio_interface(case_problem):
if model is None:
return "Model not trained yet."
case_problem_encoder = joblib.load("case_problem_encoder.pkl")
feedback_encoder = joblib.load("feedback_encoder.pkl")
case_problem_lower = case_problem.strip().lower()
if case_problem_lower not in df["Case Problem"].values:
valid_problems = ", ".join(df["Case Problem"].unique())
return f"Invalid case problem. Please enter a valid category. Options: {valid_problems}"
try:
case_problem_encoded = case_problem_encoder.transform([case_problem_lower])
prediction = model.predict([[case_problem_encoded[0]]])
feedback_predicted = feedback_encoder.inverse_transform(prediction)[0]
return f"Predicted Feedback: {feedback_predicted}"
except Exception as e:
return f"Error: {str(e)}"
# β
Start API & Gradio with Submit button
def start_app():
""" Start API and Gradio Interface """
gr_interface = gr.Interface(
fn=gradio_interface,
inputs=gr.Textbox(label="Enter Case Problem"),
outputs=gr.Textbox(label="Predicted Feedback"),
live=False, # β
Submit button enabled
allow_flagging="never",
)
gr_interface.launch(share=True, debug=True) # β
Debugging enabled
uvicorn.run(app, host="0.0.0.0", port=8000)
if __name__ == "__main__":
start_app()
|