Spaces:

arnab12345678
/

AI-Text-Detector

Build error

App Files Files Community

arnab12345678 commited on Mar 17, 2025

Commit

1e2aaf2

verified ·

1 Parent(s): 10289e6

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -22

app.py CHANGED Viewed

@@ -2,22 +2,22 @@ from fastapi import FastAPI, HTTPException
 import joblib
 import numpy as np
 import pandas as pd
-import os
 from pydantic import BaseModel
 from xgboost import XGBClassifier
-# Load the model with proper handling
 try:
     model = XGBClassifier()
     model.load_model("xgboost_model.json")
-except FileNotFoundError:
-    raise RuntimeError("Error: xgboost_model.json not found. Make sure the file is properly stored.")
-# Load the vectorizer with proper handling
 try:
     vectorizer = joblib.load("vectorizer.joblib")
-except FileNotFoundError:
-    raise RuntimeError("Error: vectorizer.joblib not found. Make sure the file is properly stored.")
 # Initialize FastAPI
 app = FastAPI()
@@ -27,38 +27,30 @@ class TextInput(BaseModel):
     text: str
 # Text cleaning function
-def _text_cleaning(df, text_column, new_column):
-    df[text_column] = df[text_column].fillna("")
-    df[new_column] = df[text_column].str.lower().str.replace(r"[^a-z0-9\s]", "", regex=True)
-    return df
 @app.post("/predict/")
 def predict(data: TextInput):
     test_text = data.text.strip()
     if not test_text:
         raise HTTPException(status_code=400, detail="Input text cannot be empty.")
-    # Create DataFrame for text preprocessing
-    test_df = pd.DataFrame([test_text], columns=['text'])
-    test_df = _text_cleaning(test_df, 'text', 'cleaned_text')
     # TF-IDF transformation
     try:
-        test_tfidf = vectorizer.transform(test_df['cleaned_text'])
-        test_tfidf = test_tfidf.toarray() if hasattr(test_tfidf, "toarray") else test_tfidf.todense()
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"TF-IDF transformation failed: {str(e)}")
     # Compute text length feature
     test_text_length = np.array([[len(test_text)]], dtype=np.float32)
-    # Ensure proper dimensionality before stacking
-    if test_tfidf.shape[0] != test_text_length.shape[0]:
-        raise HTTPException(status_code=500, detail="Feature shape mismatch.")
     # Combine features
-    test_features = np.hstack([test_tfidf, test_text_length])
     # Make prediction
     try:

 import joblib
 import numpy as np
 import pandas as pd
 from pydantic import BaseModel
 from xgboost import XGBClassifier
+import xgboost as xgb
+# Load XGBoost model with error handling
 try:
     model = XGBClassifier()
     model.load_model("xgboost_model.json")
+except Exception as e:
+    raise RuntimeError(f"Error loading model: {str(e)}")
+# Load TF-IDF vectorizer with error handling
 try:
     vectorizer = joblib.load("vectorizer.joblib")
+except Exception as e:
+    raise RuntimeError(f"Error loading vectorizer: {str(e)}")
 # Initialize FastAPI
 app = FastAPI()
     text: str
 # Text cleaning function
+def _text_cleaning(text):
+    return text.lower().strip().replace(r"[^a-z0-9\s]", "", regex=True)
 @app.post("/predict/")
 def predict(data: TextInput):
     test_text = data.text.strip()
     if not test_text:
         raise HTTPException(status_code=400, detail="Input text cannot be empty.")
+    # Preprocess text
+    cleaned_text = _text_cleaning(test_text)
     # TF-IDF transformation
     try:
+        test_tfidf = vectorizer.transform([cleaned_text])
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"TF-IDF transformation failed: {str(e)}")
     # Compute text length feature
     test_text_length = np.array([[len(test_text)]], dtype=np.float32)
     # Combine features
+    test_features = np.hstack([test_tfidf.toarray(), test_text_length])
     # Make prediction
     try: