THEZYZSTUDIO commited on
Commit
096876c
·
verified ·
1 Parent(s): a71cc63

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -7
app.py CHANGED
@@ -1,4 +1,3 @@
1
- # [سيرفر النموذج] app.py
2
  import os, json, asyncio
3
  from fastapi import FastAPI, Request, HTTPException
4
  from fastapi.responses import StreamingResponse
@@ -22,7 +21,7 @@ def load_model():
22
  model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
23
  llm = Llama(
24
  model_path=model_path,
25
- n_ctx=2048,
26
  n_threads=4,
27
  n_gpu_layers=0,
28
  use_mmap=True,
@@ -36,14 +35,14 @@ def startup():
36
 
37
  async def generate_stream(messages: list, mode: str):
38
  system_prompt = build_system_prompt(mode)
39
- prompt = f"<|system|>\n{system_prompt}\n<|user|>\n{messages[-1]['content']}\n<|assistant|>\n"
 
40
 
41
  if mode == "search":
42
- query = messages[-1]['content']
43
- search_res = search_web(query)
44
- prompt = f"<|system|>\n{system_prompt}\n<|user|>\n{query}\n[SEARCH RESULTS]\n{search_res}\n<|assistant|>\n"
45
 
46
- for token in llm(prompt, max_tokens=1500, stop=["<|user|>", "<|end|>"], stream=True, temperature=0.7):
47
  yield json.dumps({"token": token["choices"][0]["text"]}) + "\n"
48
  await asyncio.sleep(0.01)
49
 
 
 
1
  import os, json, asyncio
2
  from fastapi import FastAPI, Request, HTTPException
3
  from fastapi.responses import StreamingResponse
 
21
  model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
22
  llm = Llama(
23
  model_path=model_path,
24
+ n_ctx=1536,
25
  n_threads=4,
26
  n_gpu_layers=0,
27
  use_mmap=True,
 
35
 
36
  async def generate_stream(messages: list, mode: str):
37
  system_prompt = build_system_prompt(mode)
38
+ user_msg = messages[-1]['content']
39
+ prompt = f"<|system|>\n{system_prompt}\n<|user|>\n{user_msg}\n<|assistant|>\n"
40
 
41
  if mode == "search":
42
+ search_res = search_web(user_msg)
43
+ prompt = f"<|system|>\n{system_prompt}\n<|user|>\n{user_msg}\n[SEARCH RESULTS]\n{search_res}\n<|assistant|>\n"
 
44
 
45
+ for token in llm(prompt, max_tokens=1200, stop=["<|user|>", "<|end|>"], stream=True, temperature=0.7):
46
  yield json.dumps({"token": token["choices"][0]["text"]}) + "\n"
47
  await asyncio.sleep(0.01)
48