| import os, json, asyncio |
| from fastapi import FastAPI, Request, HTTPException |
| from fastapi.responses import StreamingResponse |
| from fastapi.middleware.cors import CORSMiddleware |
| from huggingface_hub import hf_hub_download |
| from llama_cpp import Llama |
| from prompts import build_system_prompt |
| from search_engine import search_web |
|
|
| app = FastAPI() |
| app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"]) |
|
|
| MODEL_REPO = "bartowski/Qwen_Qwen3.6-35B-A3B-GGUF" |
| MODEL_FILE = "Qwen_Qwen3.6-35B-A3B-IQ3_M.gguf" |
| llm = None |
|
|
| def load_model(): |
| global llm |
| if llm is None: |
| print("⬇️ جاري تحميل النموذج...") |
| model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE) |
| llm = Llama( |
| model_path=model_path, |
| n_ctx=1536, |
| n_threads=4, |
| n_gpu_layers=0, |
| use_mmap=True, |
| verbose=False |
| ) |
| print("✅ تم تحميل النموذج بنجاح.") |
|
|
| @app.on_event("startup") |
| def startup(): |
| load_model() |
|
|
| async def generate_stream(messages: list, mode: str): |
| system_prompt = build_system_prompt(mode) |
| user_msg = messages[-1]['content'] |
| prompt = f"<|system|>\n{system_prompt}\n<|user|>\n{user_msg}\n<|assistant|>\n" |
| |
| if mode == "search": |
| search_res = search_web(user_msg) |
| prompt = f"<|system|>\n{system_prompt}\n<|user|>\n{user_msg}\n[SEARCH RESULTS]\n{search_res}\n<|assistant|>\n" |
|
|
| for token in llm(prompt, max_tokens=1200, stop=["<|user|>", "<|end|>"], stream=True, temperature=0.7): |
| yield json.dumps({"token": token["choices"][0]["text"]}) + "\n" |
| await asyncio.sleep(0.01) |
|
|
| @app.post("/v1/chat/completions") |
| async def chat_completions(request: Request): |
| data = await request.json() |
| messages = data.get("messages", []) |
| mode = data.get("mode", "chat") |
| if not messages: |
| raise HTTPException(400, "No messages provided") |
| return StreamingResponse(generate_stream(messages, mode), media_type="application/json") |
|
|
| @app.get("/health") |
| def health(): |
| return {"status": "ok", "model": MODEL_FILE} |