Spaces:
Sleeping
Sleeping
File size: 2,408 Bytes
1c56f8e a122f91 1c56f8e a122f91 1c56f8e a122f91 1c56f8e a122f91 1c56f8e 46f621c 9cace32 1c56f8e 410898d a122f91 410898d a122f91 410898d 1c56f8e a122f91 1c56f8e a122f91 1c56f8e a122f91 1c56f8e a122f91 1c56f8e a2c5f90 1c56f8e a122f91 1c56f8e a122f91 1c56f8e a122f91 1c56f8e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 | from fastapi import FastAPI, Request
from contextlib import asynccontextmanager
from fastapi.middleware.cors import CORSMiddleware
from app.routes import router as api_router
from app.model import load_model
from dotenv import load_dotenv
import os
import time
import logging
load_dotenv()
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
raw_repo_id = os.getenv("MODEL_REPO_ID", "")
# Default to a highly capable model that is natively supported and FREE on Hugging Face Serverless InferenceAPI
REPO_ID = raw_repo_id.strip() if raw_repo_id and raw_repo_id.strip() else "Qwen/Qwen3.5-0.8B"
@asynccontextmanager
async def lifespan(app: FastAPI):
print("\n" + "="*60, flush=True)
print(f"🚀 INITIALIZING CHAT API: Setup remote LLM ({REPO_ID})", flush=True)
print("="*60 + "\n", flush=True)
# Store the LLM in the application state so routes can access it
app.state.llm = load_model(repo_id=REPO_ID)
print("\n✅ API is LIVE on port 7860! Ready for requests.\n", flush=True)
yield
print("\n" + "="*60, flush=True)
print("👋 Shutting down API. Goodbye!", flush=True)
app = FastAPI(
title="Multimodal Chat API",
description="Production-ready Chat API powered by LangChain and HuggingFaceEndpoint.",
version="1.0.0",
lifespan=lifespan,
)
app.add_middleware(
CORSMiddleware,
allow_origins=os.getenv("CORS_ORIGINS", "*").split(","),
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@app.middleware("http")
async def request_logging_middleware(request: Request, call_next):
start = time.time()
response = await call_next(request)
duration_ms = (time.time() - start) * 1000
if request.url.path != "/api/health":
logger.info(
f"{request.method} {request.url.path} "
f"→ {response.status_code} [{duration_ms:.1f}ms]"
)
return response
app.include_router(api_router, prefix="/api")
@app.get("/", include_in_schema=False)
async def root():
return {
"message": "Welcome to the API!",
"docs_url": "/docs",
"endpoints": {
"health": "/api/health",
"chat": "/api/chat"
}
}
if __name__ == "__main__":
import uvicorn
uvicorn.run(
"app.main:app",
host="0.0.0.0",
port=7860,
workers=1,
log_level="info",
)
|