File size: 2,408 Bytes
1c56f8e
a122f91
1c56f8e
a122f91
 
 
 
 
 
1c56f8e
a122f91
1c56f8e
a122f91
1c56f8e
 
46f621c
 
9cace32
1c56f8e
 
 
410898d
a122f91
410898d
 
a122f91
 
410898d
 
1c56f8e
a122f91
 
1c56f8e
 
a122f91
 
1c56f8e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a122f91
 
 
 
 
1c56f8e
 
a122f91
1c56f8e
a2c5f90
 
 
 
 
 
 
 
 
 
 
1c56f8e
a122f91
1c56f8e
a122f91
1c56f8e
 
a122f91
1c56f8e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
from fastapi import FastAPI, Request
from contextlib import asynccontextmanager
from fastapi.middleware.cors import CORSMiddleware
from app.routes import router as api_router
from app.model import load_model
from dotenv import load_dotenv
import os
import time
import logging

load_dotenv()

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

raw_repo_id = os.getenv("MODEL_REPO_ID", "")
# Default to a highly capable model that is natively supported and FREE on Hugging Face Serverless InferenceAPI
REPO_ID = raw_repo_id.strip() if raw_repo_id and raw_repo_id.strip() else "Qwen/Qwen3.5-0.8B"

@asynccontextmanager
async def lifespan(app: FastAPI):
    print("\n" + "="*60, flush=True)
    print(f"🚀 INITIALIZING CHAT API: Setup remote LLM ({REPO_ID})", flush=True)
    print("="*60 + "\n", flush=True)
    
    # Store the LLM in the application state so routes can access it
    app.state.llm = load_model(repo_id=REPO_ID)    
    
    print("\n✅ API is LIVE on port 7860! Ready for requests.\n", flush=True)
    yield
    print("\n" + "="*60, flush=True)
    print("👋 Shutting down API. Goodbye!", flush=True)

app = FastAPI(
    title="Multimodal Chat API",
    description="Production-ready Chat API powered by LangChain and HuggingFaceEndpoint.",
    version="1.0.0",
    lifespan=lifespan,
)

app.add_middleware(
    CORSMiddleware,
    allow_origins=os.getenv("CORS_ORIGINS", "*").split(","),
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

@app.middleware("http")
async def request_logging_middleware(request: Request, call_next):
    start = time.time()
    response = await call_next(request)
    duration_ms = (time.time() - start) * 1000
    if request.url.path != "/api/health":
        logger.info(
            f"{request.method} {request.url.path} "
            f"→ {response.status_code} [{duration_ms:.1f}ms]"
        )
    return response

app.include_router(api_router, prefix="/api")

@app.get("/", include_in_schema=False)
async def root():
    return {
        "message": "Welcome to the API!",
        "docs_url": "/docs",
        "endpoints": {
            "health": "/api/health",
            "chat": "/api/chat"
        }
    }

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(
        "app.main:app",
        host="0.0.0.0",
        port=7860,
        workers=1,
        log_level="info",
    )