from fastapi import FastAPI from transformers import pipeline import torch from pydantic import BaseModel import os import numpy as np # Explicit numpy import from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline import torch # Fix numpy initialization np.zeros(1) # Force numpy load before model app = FastAPI() # Disable xformers if needed torch.backends.cuda.enable_flash_sdp(False) torch.backends.cuda.enable_mem_efficient_sdp(False) # Configure paths and device os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface" os.makedirs(os.environ["TRANSFORMERS_CACHE"], exist_ok=True) @app.get("/ready") def readiness_check(): return {"status": "ready"} model_name = "win2win/3-epochs-classifier-ver2" try: model = AutoModelForSequenceClassification.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name) # Create pipeline with explicit classes classifier = pipeline( "text-classification", model=model, tokenizer=tokenizer, device="cuda" if torch.cuda.is_available() else "cpu" ) print("Model loaded successfully!") except Exception as e: print(f"Error loading model: {str(e)}") raise class Request(BaseModel): text: str @app.post("/predict") async def predict(request: Request): return model(request.text) if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=8000)