subbunanepalli commited on
Commit
2ff8394
·
verified ·
1 Parent(s): e13ee34

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -12
app.py CHANGED
@@ -7,7 +7,8 @@ import os
7
  from sklearn.feature_extraction.text import TfidfVectorizer
8
  from sklearn.preprocessing import LabelEncoder
9
  from sklearn.multioutput import MultiOutputClassifier
10
-
 
11
  # --- Configuration ---
12
  LABEL_COLUMNS = [
13
  "Red_Flag_Reason", "Maker_Action", "Escalation_Level",
@@ -103,40 +104,46 @@ def health_check():
103
  @app.post("/train")
104
  def train():
105
  try:
106
- os.makedirs(config.MODEL_SAVE_DIR, exist_ok=True)
107
  df = pd.read_csv(config.DATA_PATH)
108
 
109
- # Features and Labels
110
  X = df[config.TEXT_COLUMN]
111
  y = df[config.LABEL_COLUMNS]
112
 
113
  # Split the data
114
  X_train, X_test, y_train, y_test = train_test_split(
115
- X, y, test_size=0.2, random_state=42
116
  )
117
 
118
- # TF-IDF vectorization
119
- vectorizer = TfidfVectorizer()
 
 
 
 
120
  X_train_vec = vectorizer.fit_transform(X_train)
121
  X_test_vec = vectorizer.transform(X_test)
122
 
123
- # Train MultiOutput Logistic Regression
124
  model = MultiOutputClassifier(LogisticRegression(max_iter=1000))
125
  model.fit(X_train_vec, y_train)
126
 
127
- # Predict and evaluate
128
  y_pred = model.predict(X_test_vec)
 
 
129
  accuracy = {
130
- col: accuracy_score(y_test[col], [pred[i] for pred in y_pred])
131
- for i, col in enumerate(y.columns)
132
  }
133
 
134
  # Save model and vectorizer
135
  joblib.dump(model, config.MODEL_PATH)
136
- joblib.dump(vectorizer, config.TFIDF_PATH)
137
 
138
  return {
139
- "message": " Training completed successfully.",
140
  "accuracy": accuracy
141
  }
142
 
 
7
  from sklearn.feature_extraction.text import TfidfVectorizer
8
  from sklearn.preprocessing import LabelEncoder
9
  from sklearn.multioutput import MultiOutputClassifier
10
+ import config
11
+ from sklearn.metrics import accuracy_score
12
  # --- Configuration ---
13
  LABEL_COLUMNS = [
14
  "Red_Flag_Reason", "Maker_Action", "Escalation_Level",
 
104
  @app.post("/train")
105
  def train():
106
  try:
107
+ # Load data
108
  df = pd.read_csv(config.DATA_PATH)
109
 
110
+ # Prepare features and labels
111
  X = df[config.TEXT_COLUMN]
112
  y = df[config.LABEL_COLUMNS]
113
 
114
  # Split the data
115
  X_train, X_test, y_train, y_test = train_test_split(
116
+ X, y, test_size=config.TEST_SIZE, random_state=config.RANDOM_STATE
117
  )
118
 
119
+ # TF-IDF vectorizer
120
+ vectorizer = TfidfVectorizer(
121
+ max_features=config.TFIDF_MAX_FEATURES,
122
+ ngram_range=config.NGRAM_RANGE,
123
+ stop_words='english' if config.USE_STOPWORDS else None
124
+ )
125
  X_train_vec = vectorizer.fit_transform(X_train)
126
  X_test_vec = vectorizer.transform(X_test)
127
 
128
+ # Train model
129
  model = MultiOutputClassifier(LogisticRegression(max_iter=1000))
130
  model.fit(X_train_vec, y_train)
131
 
132
+ # Predict on test data
133
  y_pred = model.predict(X_test_vec)
134
+
135
+ # Calculate accuracy for each label
136
  accuracy = {
137
+ label: accuracy_score(y_test[label], [pred[i] for pred in y_pred])
138
+ for i, label in enumerate(config.LABEL_COLUMNS)
139
  }
140
 
141
  # Save model and vectorizer
142
  joblib.dump(model, config.MODEL_PATH)
143
+ joblib.dump(vectorizer, config.TFIDF_VECTORIZER_PATH)
144
 
145
  return {
146
+ "message": "Training completed successfully.",
147
  "accuracy": accuracy
148
  }
149