arnab12345678 commited on
Commit
1e2aaf2
·
verified ·
1 Parent(s): 10289e6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -22
app.py CHANGED
@@ -2,22 +2,22 @@ from fastapi import FastAPI, HTTPException
2
  import joblib
3
  import numpy as np
4
  import pandas as pd
5
- import os
6
  from pydantic import BaseModel
7
  from xgboost import XGBClassifier
 
8
 
9
- # Load the model with proper handling
10
  try:
11
  model = XGBClassifier()
12
  model.load_model("xgboost_model.json")
13
- except FileNotFoundError:
14
- raise RuntimeError("Error: xgboost_model.json not found. Make sure the file is properly stored.")
15
 
16
- # Load the vectorizer with proper handling
17
  try:
18
  vectorizer = joblib.load("vectorizer.joblib")
19
- except FileNotFoundError:
20
- raise RuntimeError("Error: vectorizer.joblib not found. Make sure the file is properly stored.")
21
 
22
  # Initialize FastAPI
23
  app = FastAPI()
@@ -27,38 +27,30 @@ class TextInput(BaseModel):
27
  text: str
28
 
29
  # Text cleaning function
30
- def _text_cleaning(df, text_column, new_column):
31
- df[text_column] = df[text_column].fillna("")
32
- df[new_column] = df[text_column].str.lower().str.replace(r"[^a-z0-9\s]", "", regex=True)
33
- return df
34
 
35
  @app.post("/predict/")
36
  def predict(data: TextInput):
37
  test_text = data.text.strip()
38
-
39
  if not test_text:
40
  raise HTTPException(status_code=400, detail="Input text cannot be empty.")
41
 
42
- # Create DataFrame for text preprocessing
43
- test_df = pd.DataFrame([test_text], columns=['text'])
44
- test_df = _text_cleaning(test_df, 'text', 'cleaned_text')
45
 
46
  # TF-IDF transformation
47
  try:
48
- test_tfidf = vectorizer.transform(test_df['cleaned_text'])
49
- test_tfidf = test_tfidf.toarray() if hasattr(test_tfidf, "toarray") else test_tfidf.todense()
50
  except Exception as e:
51
  raise HTTPException(status_code=500, detail=f"TF-IDF transformation failed: {str(e)}")
52
 
53
  # Compute text length feature
54
  test_text_length = np.array([[len(test_text)]], dtype=np.float32)
55
 
56
- # Ensure proper dimensionality before stacking
57
- if test_tfidf.shape[0] != test_text_length.shape[0]:
58
- raise HTTPException(status_code=500, detail="Feature shape mismatch.")
59
-
60
  # Combine features
61
- test_features = np.hstack([test_tfidf, test_text_length])
62
 
63
  # Make prediction
64
  try:
 
2
  import joblib
3
  import numpy as np
4
  import pandas as pd
 
5
  from pydantic import BaseModel
6
  from xgboost import XGBClassifier
7
+ import xgboost as xgb
8
 
9
+ # Load XGBoost model with error handling
10
  try:
11
  model = XGBClassifier()
12
  model.load_model("xgboost_model.json")
13
+ except Exception as e:
14
+ raise RuntimeError(f"Error loading model: {str(e)}")
15
 
16
+ # Load TF-IDF vectorizer with error handling
17
  try:
18
  vectorizer = joblib.load("vectorizer.joblib")
19
+ except Exception as e:
20
+ raise RuntimeError(f"Error loading vectorizer: {str(e)}")
21
 
22
  # Initialize FastAPI
23
  app = FastAPI()
 
27
  text: str
28
 
29
  # Text cleaning function
30
+ def _text_cleaning(text):
31
+ return text.lower().strip().replace(r"[^a-z0-9\s]", "", regex=True)
 
 
32
 
33
  @app.post("/predict/")
34
  def predict(data: TextInput):
35
  test_text = data.text.strip()
36
+
37
  if not test_text:
38
  raise HTTPException(status_code=400, detail="Input text cannot be empty.")
39
 
40
+ # Preprocess text
41
+ cleaned_text = _text_cleaning(test_text)
 
42
 
43
  # TF-IDF transformation
44
  try:
45
+ test_tfidf = vectorizer.transform([cleaned_text])
 
46
  except Exception as e:
47
  raise HTTPException(status_code=500, detail=f"TF-IDF transformation failed: {str(e)}")
48
 
49
  # Compute text length feature
50
  test_text_length = np.array([[len(test_text)]], dtype=np.float32)
51
 
 
 
 
 
52
  # Combine features
53
+ test_features = np.hstack([test_tfidf.toarray(), test_text_length])
54
 
55
  # Make prediction
56
  try: