Spaces:
Sleeping
Sleeping
| import os | |
| import sqlite3 | |
| import logging | |
| import anyio | |
| import uvicorn | |
| from fastapi import FastAPI, Depends, HTTPException, Security | |
| from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from fastapi.responses import StreamingResponse | |
| from pydantic import BaseModel | |
| from llama_cpp import Llama | |
| # --- INITIALIZATION --- | |
| app = FastAPI(title="Kwen Foundation Official API") | |
| # Updated CORS: allow_methods=["*"] is crucial for browser 'OPTIONS' preflight requests | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["https://hydrogenclient.github.io"], # For debugging, allowing all. Change to your specific domain later. | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| logging.basicConfig(level=logging.INFO, format="%(message)s") | |
| logger = logging.getLogger(__name__) | |
| security = HTTPBearer() | |
| BASE_DIR = os.path.dirname(os.path.abspath(__file__)) | |
| MODEL_PATH = os.path.join(BASE_DIR, "kwen-4b-q4_k_m.gguf") | |
| DB_PATH = "keys.db" | |
| # --- MODEL LOADING --- | |
| if os.path.exists(MODEL_PATH): | |
| size_gb = os.path.getsize(MODEL_PATH) / (1024**3) | |
| logger.info(f"✅ Model found! Size: {size_gb:.2f} GB") | |
| llm = Llama( | |
| model_path=MODEL_PATH, | |
| n_ctx=2048, | |
| n_threads=2, | |
| n_gpu_layers=0, | |
| offload_kqv=False, | |
| verbose=True | |
| ) | |
| logger.info("Kwen Model loaded.") | |
| else: | |
| logger.error(f"❌ Model NOT found at {MODEL_PATH}") | |
| llm = None | |
| # --- AUTH SETUP --- | |
| DEFAULT_KEY = "kwen_NjdT7VQauEyDfzVvPLMIQfwUuduJWT6j" | |
| def init_db(): | |
| conn = sqlite3.connect(DB_PATH) | |
| conn.execute("CREATE TABLE IF NOT EXISTS keys (key TEXT PRIMARY KEY, user TEXT)") | |
| cur = conn.execute("SELECT user FROM keys WHERE key = ?", (DEFAULT_KEY,)) | |
| if not cur.fetchone(): | |
| conn.execute("INSERT INTO keys (key, user) VALUES (?, ?)", (DEFAULT_KEY, "Public_Guest")) | |
| conn.commit() | |
| conn.close() | |
| init_db() | |
| class ChatInput(BaseModel): | |
| prompt: str | |
| max_tokens: int = 512 | |
| temperature: float = 0.6 | |
| def verify_key(auth: HTTPAuthorizationCredentials = Security(security)): | |
| provided_key = auth.credentials.strip() | |
| conn = sqlite3.connect(DB_PATH) | |
| user = conn.execute("SELECT user FROM keys WHERE key = ?", (provided_key,)).fetchone() | |
| conn.close() | |
| if not user: | |
| logger.warning(f"AUTH FAILED: Access denied for key ending in ...{provided_key[-4:]}") | |
| raise HTTPException(status_code=403, detail="Invalid API Key") | |
| return user[0] | |
| # --- STREAMING ENGINE --- | |
| async def kwen_streamer(prompt, max_tokens, temperature): | |
| formatted = f"<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n" | |
| yield "" # Initial heartbeat | |
| def run_inference(): | |
| return llm( | |
| formatted, | |
| max_tokens=max_tokens, | |
| temperature=temperature, | |
| stop=["<|im_end|>", "user:", "assistant:"], | |
| stream=True | |
| ) | |
| try: | |
| stream = await anyio.to_thread.run_sync(run_inference) | |
| for chunk in stream: | |
| token = chunk["choices"][0]["text"] | |
| if token: | |
| yield token | |
| await anyio.sleep(0.01) | |
| except Exception as e: | |
| logger.error(f"Stream Error: {e}") | |
| yield f"\n[Stream Error: {e}]" | |
| # --- ENDPOINTS --- | |
| def health(): | |
| return {"status": "running", "model": "Kwen-4B", "database": "active"} | |
| async def chat(data: ChatInput, user=Depends(verify_key)): | |
| if not llm: | |
| raise HTTPException(status_code=503, detail="Model Offline") | |
| logger.info(f"Authorized Request from: {user}") | |
| return StreamingResponse( | |
| kwen_streamer(data.prompt, data.max_tokens, data.temperature), | |
| media_type="text/event-stream" | |
| ) | |
| if __name__ == "__main__": | |
| uvicorn.run(app, host="0.0.0.0", port=7860) |