Garush65 commited on
Commit
cc5d488
·
verified ·
1 Parent(s): bcc64f9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -24
app.py CHANGED
@@ -1,32 +1,42 @@
1
- from fastapi import FastAPI, HTTPException
2
  from pydantic import BaseModel
3
  from fastapi.middleware.cors import CORSMiddleware
4
  from llama_cpp import Llama
5
  from huggingface_hub import hf_hub_download
6
- import multiprocessing
7
 
8
  # =========================================================
9
- # FAST MODEL (SUB-SECOND RESPONSE)
10
  # =========================================================
11
 
12
- REPO_ID = "bartowski/Qwen2.5-1.5B-Instruct-GGUF"
13
- FILENAME = "Qwen2.5-1.5B-Instruct-Q4_K_M.gguf"
14
 
15
- MODEL_PATH = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
 
 
 
 
 
 
 
16
 
17
  llm = Llama(
18
  model_path=MODEL_PATH,
19
- n_ctx=1536,
20
- n_batch=512,
21
- n_threads=multiprocessing.cpu_count(),
 
 
22
  verbose=False
23
  )
24
 
 
 
25
  # =========================================================
26
  # FASTAPI
27
  # =========================================================
28
 
29
- app = FastAPI(title="Apex Fast Engine")
30
 
31
  app.add_middleware(
32
  CORSMiddleware,
@@ -35,45 +45,66 @@ app.add_middleware(
35
  allow_headers=["*"],
36
  )
37
 
 
 
 
 
38
  class AnalysisRequest(BaseModel):
39
  context: str
40
  query: str
41
 
 
 
 
 
42
  @app.get("/")
43
  def health():
44
- return {"status": "online", "mode": "ultra-fast"}
 
 
 
 
 
45
 
46
  @app.post("/analyze")
47
  def analyze(req: AnalysisRequest):
48
  try:
49
- prompt = f"""<|im_start|>system
 
50
  Ты — Apex.
51
- Отвечай КОРОТКО, ТОЧНО, БЕЗ ВОДЫ.
52
- Если данных нет — скажи "Недостаточно данных".
 
53
  Язык: русский.
54
- <|im_end|>
55
- <|im_start|>user
56
  Контекст:
57
  {req.context}
58
 
59
  Вопрос:
60
  {req.query}
61
- <|im_end|>
62
- <|im_start|>assistant
63
  """
64
 
65
  output = llm(
66
  prompt,
67
- max_tokens=180,
68
- temperature=0.1,
69
- top_p=0.9,
70
- stop=["<|im_end|>"],
71
  echo=False
72
  )
73
 
 
 
74
  return {
75
- "result": output["choices"][0]["text"].strip()
 
76
  }
77
 
78
  except Exception as e:
79
- raise HTTPException(status_code=500, detail=str(e))
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
  from pydantic import BaseModel
3
  from fastapi.middleware.cors import CORSMiddleware
4
  from llama_cpp import Llama
5
  from huggingface_hub import hf_hub_download
 
6
 
7
  # =========================================================
8
+ # MODEL (ULTRA FAST FREE-TIER)
9
  # =========================================================
10
 
11
+ REPO_ID = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
12
+ FILENAME = "tinyllama-1.1b-chat.Q4_K_M.gguf"
13
 
14
+ print("[SYSTEM] Downloading TinyLlama...")
15
+
16
+ MODEL_PATH = hf_hub_download(
17
+ repo_id=REPO_ID,
18
+ filename=FILENAME
19
+ )
20
+
21
+ print("[SYSTEM] Initializing model...")
22
 
23
  llm = Llama(
24
  model_path=MODEL_PATH,
25
+ n_ctx=512, # КРИТИЧНО для скорости
26
+ n_batch=1024,
27
+ n_threads=2, # РОВНО под HF CPU Basic
28
+ use_mmap=True,
29
+ use_mlock=False,
30
  verbose=False
31
  )
32
 
33
+ print("[SYSTEM] TinyLlama READY")
34
+
35
  # =========================================================
36
  # FASTAPI
37
  # =========================================================
38
 
39
+ app = FastAPI(title="Apex Free Engine")
40
 
41
  app.add_middleware(
42
  CORSMiddleware,
 
45
  allow_headers=["*"],
46
  )
47
 
48
+ # =========================================================
49
+ # REQUEST MODEL
50
+ # =========================================================
51
+
52
  class AnalysisRequest(BaseModel):
53
  context: str
54
  query: str
55
 
56
+ # =========================================================
57
+ # ROUTES
58
+ # =========================================================
59
+
60
  @app.get("/")
61
  def health():
62
+ return {
63
+ "status": "online",
64
+ "engine": "Apex",
65
+ "model": "TinyLlama-1.1B",
66
+ "tier": "HF Free"
67
+ }
68
 
69
  @app.post("/analyze")
70
  def analyze(req: AnalysisRequest):
71
  try:
72
+ # СУПЕР КОРОТКИЙ, НО УМНЫЙ PROMPT
73
+ prompt = f"""<|system|>
74
  Ты — Apex.
75
+ Отвечай кратко, логично и по делу.
76
+ Используй только данный контекст.
77
+ Если данных недостаточно — скажи об этом.
78
  Язык: русский.
79
+ </s>
80
+ <|user|>
81
  Контекст:
82
  {req.context}
83
 
84
  Вопрос:
85
  {req.query}
86
+ </s>
87
+ <|assistant|>
88
  """
89
 
90
  output = llm(
91
  prompt,
92
+ max_tokens=60, # БОЛЬШЕ НЕЛЬЗЯ НА FREE
93
+ temperature=0.1, # Минимум фантазии
94
+ top_p=0.8,
95
+ stop=["</s>"],
96
  echo=False
97
  )
98
 
99
+ answer = output["choices"][0]["text"].strip()
100
+
101
  return {
102
+ "result": answer,
103
+ "model": "TinyLlama-1.1B"
104
  }
105
 
106
  except Exception as e:
107
+ raise HTTPException(
108
+ status_code=500,
109
+ detail=str(e)
110
+ )