arnab12345678 commited on
Commit
4d77ade
·
verified ·
1 Parent(s): 3dadb54

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -13
app.py CHANGED
@@ -1,24 +1,23 @@
1
  from fastapi import FastAPI, HTTPException
2
- import pickle
3
  import numpy as np
4
  import pandas as pd
5
  import os
6
  from pydantic import BaseModel
 
7
 
8
-
9
-
10
- # Load the model and vectorizer with error handling
11
  try:
12
- with open("xgboost_model.json", "rb") as f:
13
- model = pickle.load(f)
14
  except FileNotFoundError:
15
- raise RuntimeError("Error: xgb_model.pkl not found. Make sure the file is properly stored.")
16
 
 
17
  try:
18
- with open("vectorizer.joblib", "rb") as f:
19
- vectorizer = pickle.load(f)
20
  except FileNotFoundError:
21
- raise RuntimeError("Error: vectorizer.pkl not found. Make sure the file is properly stored.")
22
 
23
  # Initialize FastAPI
24
  app = FastAPI()
@@ -27,7 +26,7 @@ app = FastAPI()
27
  class TextInput(BaseModel):
28
  text: str
29
 
30
- # Text cleaning function (improved handling of NaN/empty text)
31
  def _text_cleaning(df, text_column, new_column):
32
  df[text_column] = df[text_column].fillna("")
33
  df[new_column] = df[text_column].str.lower().str.replace(r"[^a-z0-9\s]", "", regex=True)
@@ -44,7 +43,7 @@ def predict(data: TextInput):
44
  test_df = pd.DataFrame([test_text], columns=['text'])
45
  test_df = _text_cleaning(test_df, 'text', 'cleaned_text')
46
 
47
- # TF-IDF transformation with error handling
48
  try:
49
  test_tfidf = vectorizer.transform(test_df['cleaned_text'])
50
  test_tfidf = test_tfidf.toarray() if hasattr(test_tfidf, "toarray") else test_tfidf.todense()
@@ -52,7 +51,11 @@ def predict(data: TextInput):
52
  raise HTTPException(status_code=500, detail=f"TF-IDF transformation failed: {str(e)}")
53
 
54
  # Compute text length feature
55
- test_text_length = np.array([[len(test_text)]])
 
 
 
 
56
 
57
  # Combine features
58
  test_features = np.hstack([test_tfidf, test_text_length])
 
1
  from fastapi import FastAPI, HTTPException
2
+ import joblib
3
  import numpy as np
4
  import pandas as pd
5
  import os
6
  from pydantic import BaseModel
7
+ from xgboost import XGBClassifier
8
 
9
+ # Load the model with proper handling
 
 
10
  try:
11
+ model = XGBClassifier()
12
+ model.load_model("xgboost_model.json")
13
  except FileNotFoundError:
14
+ raise RuntimeError("Error: xgboost_model.json not found. Make sure the file is properly stored.")
15
 
16
+ # Load the vectorizer with proper handling
17
  try:
18
+ vectorizer = joblib.load("vectorizer.joblib")
 
19
  except FileNotFoundError:
20
+ raise RuntimeError("Error: vectorizer.joblib not found. Make sure the file is properly stored.")
21
 
22
  # Initialize FastAPI
23
  app = FastAPI()
 
26
  class TextInput(BaseModel):
27
  text: str
28
 
29
+ # Text cleaning function
30
  def _text_cleaning(df, text_column, new_column):
31
  df[text_column] = df[text_column].fillna("")
32
  df[new_column] = df[text_column].str.lower().str.replace(r"[^a-z0-9\s]", "", regex=True)
 
43
  test_df = pd.DataFrame([test_text], columns=['text'])
44
  test_df = _text_cleaning(test_df, 'text', 'cleaned_text')
45
 
46
+ # TF-IDF transformation
47
  try:
48
  test_tfidf = vectorizer.transform(test_df['cleaned_text'])
49
  test_tfidf = test_tfidf.toarray() if hasattr(test_tfidf, "toarray") else test_tfidf.todense()
 
51
  raise HTTPException(status_code=500, detail=f"TF-IDF transformation failed: {str(e)}")
52
 
53
  # Compute text length feature
54
+ test_text_length = np.array([[len(test_text)]], dtype=np.float32)
55
+
56
+ # Ensure proper dimensionality before stacking
57
+ if test_tfidf.shape[0] != test_text_length.shape[0]:
58
+ raise HTTPException(status_code=500, detail="Feature shape mismatch.")
59
 
60
  # Combine features
61
  test_features = np.hstack([test_tfidf, test_text_length])