Spaces:

arnab12345678
/

AI-Text-Detector

Build error

App Files Files Community

arnab12345678 commited on Mar 17, 2025

Commit

4d77ade

verified ·

1 Parent(s): 3dadb54

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -13

app.py CHANGED Viewed

@@ -1,24 +1,23 @@
 from fastapi import FastAPI, HTTPException
-import pickle
 import numpy as np
 import pandas as pd
 import os
 from pydantic import BaseModel
-# Load the model and vectorizer with error handling
 try:
-    with open("xgboost_model.json", "rb") as f:
-        model = pickle.load(f)
 except FileNotFoundError:
-    raise RuntimeError("Error: xgb_model.pkl not found. Make sure the file is properly stored.")
 try:
-    with open("vectorizer.joblib", "rb") as f:
-        vectorizer = pickle.load(f)
 except FileNotFoundError:
-    raise RuntimeError("Error: vectorizer.pkl not found. Make sure the file is properly stored.")
 # Initialize FastAPI
 app = FastAPI()
@@ -27,7 +26,7 @@ app = FastAPI()
 class TextInput(BaseModel):
     text: str
-# Text cleaning function (improved handling of NaN/empty text)
 def _text_cleaning(df, text_column, new_column):
     df[text_column] = df[text_column].fillna("")
     df[new_column] = df[text_column].str.lower().str.replace(r"[^a-z0-9\s]", "", regex=True)
@@ -44,7 +43,7 @@ def predict(data: TextInput):
     test_df = pd.DataFrame([test_text], columns=['text'])
     test_df = _text_cleaning(test_df, 'text', 'cleaned_text')
-    # TF-IDF transformation with error handling
     try:
         test_tfidf = vectorizer.transform(test_df['cleaned_text'])
         test_tfidf = test_tfidf.toarray() if hasattr(test_tfidf, "toarray") else test_tfidf.todense()
@@ -52,7 +51,11 @@ def predict(data: TextInput):
         raise HTTPException(status_code=500, detail=f"TF-IDF transformation failed: {str(e)}")
     # Compute text length feature
-    test_text_length = np.array([[len(test_text)]])
     # Combine features
     test_features = np.hstack([test_tfidf, test_text_length])

 from fastapi import FastAPI, HTTPException
+import joblib
 import numpy as np
 import pandas as pd
 import os
 from pydantic import BaseModel
+from xgboost import XGBClassifier
+# Load the model with proper handling
 try:
+    model = XGBClassifier()
+    model.load_model("xgboost_model.json")
 except FileNotFoundError:
+    raise RuntimeError("Error: xgboost_model.json not found. Make sure the file is properly stored.")
+# Load the vectorizer with proper handling
 try:
+    vectorizer = joblib.load("vectorizer.joblib")
 except FileNotFoundError:
+    raise RuntimeError("Error: vectorizer.joblib not found. Make sure the file is properly stored.")
 # Initialize FastAPI
 app = FastAPI()
 class TextInput(BaseModel):
     text: str
+# Text cleaning function
 def _text_cleaning(df, text_column, new_column):
     df[text_column] = df[text_column].fillna("")
     df[new_column] = df[text_column].str.lower().str.replace(r"[^a-z0-9\s]", "", regex=True)
     test_df = pd.DataFrame([test_text], columns=['text'])
     test_df = _text_cleaning(test_df, 'text', 'cleaned_text')
+    # TF-IDF transformation
     try:
         test_tfidf = vectorizer.transform(test_df['cleaned_text'])
         test_tfidf = test_tfidf.toarray() if hasattr(test_tfidf, "toarray") else test_tfidf.todense()
         raise HTTPException(status_code=500, detail=f"TF-IDF transformation failed: {str(e)}")
     # Compute text length feature
+    test_text_length = np.array([[len(test_text)]], dtype=np.float32)
+    # Ensure proper dimensionality before stacking
+    if test_tfidf.shape[0] != test_text_length.shape[0]:
+        raise HTTPException(status_code=500, detail="Feature shape mismatch.")
     # Combine features
     test_features = np.hstack([test_tfidf, test_text_length])