Garush65 commited on
Commit
bcc64f9
·
verified ·
1 Parent(s): 58c0c73

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -71
app.py CHANGED
@@ -3,129 +3,77 @@ from pydantic import BaseModel
3
  from fastapi.middleware.cors import CORSMiddleware
4
  from llama_cpp import Llama
5
  from huggingface_hub import hf_hub_download
6
- import os
7
  import multiprocessing
8
 
9
  # =========================================================
10
- # MODEL CONFIG (WORLD-LEVEL BALANCE)
11
  # =========================================================
12
 
13
- REPO_ID = "bartowski/Qwen2.5-3B-Instruct-GGUF"
14
- FILENAME = "Qwen2.5-3B-Instruct-Q4_K_M.gguf"
15
 
16
- print(f"[SYSTEM] Downloading model: {FILENAME}")
17
-
18
- try:
19
- MODEL_PATH = hf_hub_download(
20
- repo_id=REPO_ID,
21
- filename=FILENAME
22
- )
23
- except Exception as e:
24
- raise RuntimeError(f"Model download failed: {e}")
25
-
26
- # =========================================================
27
- # LLM INITIALIZATION
28
- # =========================================================
29
-
30
- print("[SYSTEM] Initializing Apex Engine...")
31
 
32
  llm = Llama(
33
  model_path=MODEL_PATH,
34
- n_ctx=2048,
35
  n_batch=512,
36
  n_threads=multiprocessing.cpu_count(),
37
  verbose=False
38
  )
39
 
40
- print("[SYSTEM] Apex Engine READY")
41
-
42
  # =========================================================
43
- # FASTAPI APP
44
  # =========================================================
45
 
46
- app = FastAPI(
47
- title="Apex Engine",
48
- version="1.0",
49
- description="High-performance reasoning backend"
50
- )
51
 
52
  app.add_middleware(
53
  CORSMiddleware,
54
  allow_origins=["*"],
55
- allow_credentials=True,
56
  allow_methods=["*"],
57
  allow_headers=["*"],
58
  )
59
 
60
- # =========================================================
61
- # DATA MODELS
62
- # =========================================================
63
-
64
  class AnalysisRequest(BaseModel):
65
  context: str
66
  query: str
67
 
68
- # =========================================================
69
- # ROUTES
70
- # =========================================================
71
-
72
  @app.get("/")
73
  def health():
74
- return {
75
- "status": "online",
76
- "engine": "Apex",
77
- "model": "Qwen2.5-3B",
78
- "mode": "high-reasoning"
79
- }
80
 
81
  @app.post("/analyze")
82
  def analyze(req: AnalysisRequest):
83
  try:
84
- prompt = f"""
85
- <|im_start|>system
86
- Ты Apex, аналитический ИИ мирового уровня.
87
-
88
- СТРОГИЕ ПРАВИЛА:
89
- - Используй ТОЛЬКО данный контекст
90
- - НЕ додумывай факты
91
- - Если данных недостаточно — скажи об этом
92
- - Отвечай чётко, логично и по делу
93
- - После ответа выполни краткую самопроверку логики
94
-
95
- Язык ответа: русский
96
- Формат: структурированный текст
97
  <|im_end|>
98
-
99
  <|im_start|>user
100
- КОНТЕКСТ:
101
  {req.context}
102
 
103
- ВОПРОС:
104
  {req.query}
105
  <|im_end|>
106
-
107
  <|im_start|>assistant
108
  """
109
 
110
  output = llm(
111
  prompt,
112
- max_tokens=400,
113
- temperature=0.15,
114
  top_p=0.9,
115
  stop=["<|im_end|>"],
116
  echo=False
117
  )
118
 
119
- answer = output["choices"][0]["text"].strip()
120
-
121
  return {
122
- "result": answer,
123
- "model": "Qwen2.5-3B",
124
- "engine": "Apex"
125
  }
126
 
127
  except Exception as e:
128
- raise HTTPException(
129
- status_code=500,
130
- detail=f"Inference error: {str(e)}"
131
- )
 
3
  from fastapi.middleware.cors import CORSMiddleware
4
  from llama_cpp import Llama
5
  from huggingface_hub import hf_hub_download
 
6
  import multiprocessing
7
 
8
  # =========================================================
9
+ # FAST MODEL (SUB-SECOND RESPONSE)
10
  # =========================================================
11
 
12
+ REPO_ID = "bartowski/Qwen2.5-1.5B-Instruct-GGUF"
13
+ FILENAME = "Qwen2.5-1.5B-Instruct-Q4_K_M.gguf"
14
 
15
+ MODEL_PATH = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  llm = Llama(
18
  model_path=MODEL_PATH,
19
+ n_ctx=1536,
20
  n_batch=512,
21
  n_threads=multiprocessing.cpu_count(),
22
  verbose=False
23
  )
24
 
 
 
25
  # =========================================================
26
+ # FASTAPI
27
  # =========================================================
28
 
29
+ app = FastAPI(title="Apex Fast Engine")
 
 
 
 
30
 
31
  app.add_middleware(
32
  CORSMiddleware,
33
  allow_origins=["*"],
 
34
  allow_methods=["*"],
35
  allow_headers=["*"],
36
  )
37
 
 
 
 
 
38
  class AnalysisRequest(BaseModel):
39
  context: str
40
  query: str
41
 
 
 
 
 
42
  @app.get("/")
43
  def health():
44
+ return {"status": "online", "mode": "ultra-fast"}
 
 
 
 
 
45
 
46
  @app.post("/analyze")
47
  def analyze(req: AnalysisRequest):
48
  try:
49
+ prompt = f"""<|im_start|>system
50
+ Ты — Apex.
51
+ Отвечай КОРОТКО, ТОЧНО, БЕЗ ВОДЫ.
52
+ Если данных нет — скажи "Недостаточно данных".
53
+ Язык: русский.
 
 
 
 
 
 
 
 
54
  <|im_end|>
 
55
  <|im_start|>user
56
+ Контекст:
57
  {req.context}
58
 
59
+ Вопрос:
60
  {req.query}
61
  <|im_end|>
 
62
  <|im_start|>assistant
63
  """
64
 
65
  output = llm(
66
  prompt,
67
+ max_tokens=180,
68
+ temperature=0.1,
69
  top_p=0.9,
70
  stop=["<|im_end|>"],
71
  echo=False
72
  )
73
 
 
 
74
  return {
75
+ "result": output["choices"][0]["text"].strip()
 
 
76
  }
77
 
78
  except Exception as e:
79
+ raise HTTPException(status_code=500, detail=str(e))