THEZYZSTUDIO commited on
Commit
6f9f476
·
verified ·
1 Parent(s): d2bcd1f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -0
app.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # [سيرفر النموذج] app.py
2
+ import os, json, asyncio
3
+ from fastapi import FastAPI, Request, HTTPException
4
+ from fastapi.responses import StreamingResponse
5
+ from fastapi.middleware.cors import CORSMiddleware
6
+ from huggingface_hub import hf_hub_download
7
+ from llama_cpp import Llama
8
+ from prompts import build_system_prompt
9
+ from search_engine import search_web
10
+
11
+ app = FastAPI()
12
+ app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
13
+
14
+ MODEL_REPO = "bartowski/Qwen_Qwen3.6-35B-A3B-GGUF"
15
+ MODEL_FILE = "Qwen_Qwen3.6-35B-A3B-IQ3_M.gguf"
16
+ llm = None
17
+
18
+ def load_model():
19
+ global llm
20
+ if llm is None:
21
+ print("⬇️ جاري تحميل النموذج...")
22
+ model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
23
+ llm = Llama(
24
+ model_path=model_path,
25
+ n_ctx=2048,
26
+ n_threads=4,
27
+ n_gpu_layers=0,
28
+ use_mmap=True,
29
+ verbose=False
30
+ )
31
+ print("✅ تم تحميل النموذج بنجاح.")
32
+
33
+ @app.on_event("startup")
34
+ def startup():
35
+ load_model()
36
+
37
+ async def generate_stream(messages: list, mode: str):
38
+ system_prompt = build_system_prompt(mode)
39
+ prompt = f"<|system|>\n{system_prompt}\n<|user|>\n{messages[-1]['content']}\n<|assistant|>\n"
40
+
41
+ if mode == "search":
42
+ query = messages[-1]['content']
43
+ search_res = search_web(query)
44
+ prompt = f"<|system|>\n{system_prompt}\n<|user|>\n{query}\n[SEARCH RESULTS]\n{search_res}\n<|assistant|>\n"
45
+
46
+ for token in llm(prompt, max_tokens=1500, stop=["<|user|>", "<|end|>"], stream=True, temperature=0.7):
47
+ yield json.dumps({"token": token["choices"][0]["text"]}) + "\n"
48
+ await asyncio.sleep(0.01)
49
+
50
+ @app.post("/v1/chat/completions")
51
+ async def chat_completions(request: Request):
52
+ data = await request.json()
53
+ messages = data.get("messages", [])
54
+ mode = data.get("mode", "chat")
55
+ if not messages:
56
+ raise HTTPException(400, "No messages provided")
57
+ return StreamingResponse(generate_stream(messages, mode), media_type="application/json")
58
+
59
+ @app.get("/health")
60
+ def health():
61
+ return {"status": "ok", "model": MODEL_FILE}