Rid3 commited on
Commit
9560ef7
·
verified ·
1 Parent(s): 461bc4b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -22
app.py CHANGED
@@ -1,18 +1,26 @@
1
  from fastapi import FastAPI, HTTPException
 
2
  from pydantic import BaseModel
3
  from llama_cpp import Llama
4
  from huggingface_hub import hf_hub_download
5
  import gc
6
- import os
7
 
8
  app = FastAPI(title="RID3 QUANTUM AI API")
9
 
10
- # Конфигурация твоего хранилища
 
 
 
 
 
 
 
 
 
11
  REPO_ID = "Rid3/xtime-v1beta-gguf-storage"
12
  current_llm = None
13
  current_model_name = ""
14
 
15
- # Твои файлы (из скриншота)
16
  MODELS = {
17
  "medium": "xtime-v1beta-n-m_1p.gguf",
18
  "large": "xtime-v1beta-q4_K_M.gguf",
@@ -21,52 +29,40 @@ MODELS = {
21
 
22
  def load_model(model_key: str):
23
  global current_llm, current_model_name
24
-
25
  filename = MODELS.get(model_key)
26
  if not filename:
27
- raise HTTPException(status_code=404, detail="Модель не найдена в списке")
28
-
29
  if current_model_name == model_key:
30
  return
31
-
32
- print(f"Загрузка модели {filename} из {REPO_ID}...")
33
 
34
  if current_llm is not None:
35
  del current_llm
36
  gc.collect()
37
 
38
  try:
39
- # Скачиваем файл из твоего репозитория моделей
40
  model_path = hf_hub_download(repo_id=REPO_ID, filename=filename)
41
-
42
- # Инициализация модели
43
- current_llm = Llama(
44
- model_path=model_path,
45
- n_ctx=2048,
46
- n_threads=4 # Оптимально для бесплатных CPU на HF
47
- )
48
  current_model_name = model_key
49
  except Exception as e:
50
  raise HTTPException(status_code=500, detail=str(e))
51
 
52
- # При старте грузим самую маленькую, чтобы Space быстро запустился
53
  @app.on_event("startup")
54
  async def startup_event():
55
- load_model("small")
56
 
57
  class ChatRequest(BaseModel):
58
  prompt: str
59
- model_type: str = "small" # Можно присылать "small", "medium" или "large"
60
 
61
  @app.post("/chat")
62
  async def chat(request: ChatRequest):
63
  if request.model_type != current_model_name:
64
  load_model(request.model_type)
65
-
66
  output = current_llm(
67
- f"Q: {request.prompt} A:",
68
  max_tokens=256,
69
- stop=["Q:", "\n"],
70
  echo=False
71
  )
72
  return {"response": output["choices"][0]["text"].strip()}
 
1
  from fastapi import FastAPI, HTTPException
2
+ from fastapi.middleware.cors import CORSMiddleware # Добавь это
3
  from pydantic import BaseModel
4
  from llama_cpp import Llama
5
  from huggingface_hub import hf_hub_download
6
  import gc
 
7
 
8
  app = FastAPI(title="RID3 QUANTUM AI API")
9
 
10
+ # --- НАСТРОЙКА CORS ---
11
+ app.add_middleware(
12
+ CORSMiddleware,
13
+ allow_origins=["*"], # Разрешить запросы с любых сайтов
14
+ allow_credentials=True,
15
+ allow_methods=["*"],
16
+ allow_headers=["*"],
17
+ )
18
+ # ----------------------
19
+
20
  REPO_ID = "Rid3/xtime-v1beta-gguf-storage"
21
  current_llm = None
22
  current_model_name = ""
23
 
 
24
  MODELS = {
25
  "medium": "xtime-v1beta-n-m_1p.gguf",
26
  "large": "xtime-v1beta-q4_K_M.gguf",
 
29
 
30
  def load_model(model_key: str):
31
  global current_llm, current_model_name
 
32
  filename = MODELS.get(model_key)
33
  if not filename:
34
+ raise HTTPException(status_code=404, detail="Модель не найдена")
 
35
  if current_model_name == model_key:
36
  return
 
 
37
 
38
  if current_llm is not None:
39
  del current_llm
40
  gc.collect()
41
 
42
  try:
 
43
  model_path = hf_hub_download(repo_id=REPO_ID, filename=filename)
44
+ current_llm = Llama(model_path=model_path, n_ctx=2048, n_threads=4)
 
 
 
 
 
 
45
  current_model_name = model_key
46
  except Exception as e:
47
  raise HTTPException(status_code=500, detail=str(e))
48
 
 
49
  @app.on_event("startup")
50
  async def startup_event():
51
+ load_model("large")
52
 
53
  class ChatRequest(BaseModel):
54
  prompt: str
55
+ model_type: str = "large"
56
 
57
  @app.post("/chat")
58
  async def chat(request: ChatRequest):
59
  if request.model_type != current_model_name:
60
  load_model(request.model_type)
61
+
62
  output = current_llm(
63
+ f"User: {request.prompt}\nAI:",
64
  max_tokens=256,
65
+ stop=["User:", "\n"],
66
  echo=False
67
  )
68
  return {"response": output["choices"][0]["text"].strip()}