| import os |
| from fastapi import FastAPI, Request |
| from fastapi.responses import JSONResponse, FileResponse |
| from huggingface_hub import InferenceClient |
|
|
| app = FastAPI() |
|
|
| HF_TOKEN = os.getenv("HF_TOKEN") |
| MODEL_ID = "huihui-ai/Qwen2.5-72B-Instruct-abliterated" |
| client = InferenceClient(token=HF_TOKEN) |
|
|
| @app.get("/") |
| async def serve_index(): |
| |
| return FileResponse("index.html") |
|
|
| @app.get("/static/styles.css") |
| async def serve_css(): |
| |
| return FileResponse("styles.css") |
|
|
| @app.post("/chat") |
| async def chat_handler(request: Request): |
| try: |
| body = await request.json() |
| messages = body.get("messages", []) |
| response = client.chat_completion(model=MODEL_ID, messages=messages, max_tokens=1500) |
| return JSONResponse({"choices": [{"message": {"role": "assistant", "content": response.choices[0].message.content}}]}) |
| except Exception as e: |
| return JSONResponse({"error": str(e)}, status_code=500) |
|
|