Spaces:

Thomas-L-debug
/

Climate_Fake_News_API

Sleeping

App Files Files Community

Olivier-52 commited on Dec 11, 2025

Commit

fe10113

1 Parent(s): 7d09baa

Fix app.py

Browse files

Files changed (2) hide show

app.py +97 -73
requirements.txt +3 -1

app.py CHANGED Viewed

@@ -1,90 +1,114 @@
 import os
-import uvicorn
-import pandas as pd
-from pydantic import BaseModel
-from fastapi import FastAPI, File, UploadFile
 import mlflow
-from xgboost import XGBClassifier
 from dotenv import load_dotenv
-description = """
-# Climate Fake News Detector(https://github.com/Olivier-52/Fake_news_detector.git)
-This API allows you to use a Machine Learning model to detect fake news related to climate change.
-## Machine-Learning
-Where you can:
-* `/predict` : prediction for a single value
-Check out documentation for more information on each endpoint.
-"""
-tags_metadata = [
-    {
-        "name": "Predictions",
-        "description": "Endpoints that uses our Machine Learning model",
-    },
-]
 load_dotenv()
 os.environ["AWS_ACCESS_KEY_ID"] = os.getenv("AWS_ACCESS_KEY_ID")
 os.environ["AWS_SECRET_ACCESS_KEY"] = os.getenv("AWS_SECRET_ACCESS_KEY")
-MLFLOW_TRACKING_URI = os.environ["MLFLOW_TRACKING_URI"]
-mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
-mlflow.set_tracking_uri("https://olivier-52-ml-flow.hf.space")
-model = mlflow.sklearn.load_model("models:/climate-fake-news-detector-model-XGBoost-v1@production")
 app = FastAPI(
-    title="API for Climate Fake News Detector",
-    description=description,
-    version="1.0",
-    contact={
-        "name": "Olivier",
-        "url": "https://github.com/Olivier-52/Fake_news_detector",
-    },
-    openapi_tags=tags_metadata,)
-@app.get("/")
-def index():
-    """Return a message to the user.
-    This endpoint does not take any parameters and returns a message
-    to the user. It is used to test the API.
-    Returns:
-        str: A message to the user.
-    """
-    return "Hello world! Go to /docs to try the API."
-class PredictionFeatures(BaseModel):
     text: str
-@app.post("/predict", tags=["Predictions"])
-def predict(features: PredictionFeatures):
-    """Predict Climate Fake News.
-    This endpoint takes a text as input and returns the predicted class : fake, real, or biased.
-    Args:
-        features (PredictionFeatures): A PredictionFeatures object
-            containing the text to analyze.
-    Returns:
-        dict: A dictionary containing the predicted class.
-    """
-    df = pd.DataFrame({
-        "text": [features.text],
-    })
-    prediction = model.predict(df)[0]
-    return {"prediction": float(prediction)}
 if __name__ == "__main__":
-    uvicorn.run(app, host="localhost", port=8001)

 import os
 import mlflow
+import pickle
+from fastapi import FastAPI, HTTPException, status
+from pydantic import BaseModel
 from dotenv import load_dotenv
+from typing import Optional
+# Charge les variables d'environnement
 load_dotenv()
+# Configuration des variables d'environnement
+MLFLOW_TRACKING_APP_URI = os.getenv("MLFLOW_TRACKING_APP_URI", "https://olivier-52-ml-flow.hf.space")
+MODEL_NAME = os.getenv("MODEL_NAME", "climate-fake-news-detector-model-XGBoost-v1")
+STAGE = os.getenv("STAGE", "production")
+# Configure les identifiants AWS pour accéder au bucket S3
 os.environ["AWS_ACCESS_KEY_ID"] = os.getenv("AWS_ACCESS_KEY_ID")
 os.environ["AWS_SECRET_ACCESS_KEY"] = os.getenv("AWS_SECRET_ACCESS_KEY")
+# Initialise FastAPI
 app = FastAPI(
+    title="Climate Fake News Detector API",
+    description="API pour détecter les fake news sur le climat avec un modèle XGBoost.",
+    version="1.0.0"
+)
+# Modèle pour les données d'entrée
+class TextInput(BaseModel):
     text: str
+# Variables globales pour stocker le modèle et le vectorizer
+model = None
+vectorizer = None
+# Fonction pour charger le modèle depuis MLflow
+def load_model():
+    global model
+    try:
+        # Configure l'URI de tracking MLflow
+        mlflow.set_tracking_uri(MLFLOW_TRACKING_APP_URI)
+        # Charge le modèle depuis MLflow
+        model_uri = f"models:/{MODEL_NAME}@{STAGE}"
+        model = mlflow.sklearn.load_model(model_uri)
+        print("Modèle chargé avec succès depuis MLflow.")
+    except Exception as e:
+        print(f"Erreur lors du chargement du modèle depuis MLflow : {e}")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Impossible de charger le modèle depuis MLflow : {e}"
+        )
+# Fonction pour charger le vectorizer depuis MLflow
+def load_vectorizer():
+    try:
+        # Initialise le client MLflow
+        client = mlflow.MlflowClient(MLFLOW_TRACKING_APP_URI)
+        # Récupère les informations sur le modèle
+        model_info = client.get_model_version_by_alias(MODEL_NAME, STAGE)
+        run_id = model_info.run_id
+        # Télécharge le fichier vectorizer.pkl depuis MLflow
+        local_path = mlflow.artifacts.download_artifacts(
+            artifact_path="vectorizer.pkl",
+            run_id=run_id
+        )
+        # Charge le vectorizer depuis le fichier
+        with open(local_path, "rb") as f:
+            vectorizer = pickle.load(f)
+        return vectorizer
+    except Exception as e:
+        print(f"Erreur lors du chargement du vectorizer : {e}")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Impossible de charger le vectorizer : {e}"
+        )
+load_model()
+vectorizer = load_vectorizer()
+@app.get("/")
+async def read_root():
+    return {
+        "message": "Bienvenue sur l'API Climate Fake News Detector !",
+        "documentation": "Consultez la documentation de l'API à l'adresse /docs."
+    }
+@app.post("/predict")
+async def predict(input_data: TextInput):
+    global model, vectorizer
+    if model is None or vectorizer is None:
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail="Le modèle ou le vectorizer n'est pas chargé."
+        )
+    try:
+        X_vectorized = vectorizer.transform([input_data.text]).toarray()
+        prediction = model.predict(X_vectorized)
+        return {"prediction": int(prediction[0])}
+    except Exception as e:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=f"Erreur lors de la prédiction : {e}"
+        )
 if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="localhost", port=8000)

requirements.txt CHANGED Viewed

@@ -11,4 +11,6 @@ openpyxl
 boto3
 python-multipart
 dotenv
-xgboost

 boto3
 python-multipart
 dotenv
+xgboost
+pickle
+os