Spaces:

Rid3
/

xtime-api

Sleeping

App Files Files Community

Rid3 commited on Mar 27

Commit

599a0f5

verified ·

1 Parent(s): 5c6c743

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -29

app.py CHANGED Viewed

@@ -21,11 +21,10 @@ REPO_ID = "Rid3/xtime-v1beta-gguf-storage"
 current_llm = None
 current_model_name = ""
-# Модели из твоего репозитория
 MODELS = {
-    "medium": "xtime-v1beta-n-m_1p.gguf",   # 2.8 ГБ — самая стабильная
-    "large":  "xtime-v1beta-q4_K_M.gguf",   # 5.9 ГБ — тяжёлая (пока не используем)
-    "small":  "xtime-v1beta-xp-r_2.gguf"    # очень лёгкая
 }
 def load_model(model_key: str):
@@ -48,55 +47,54 @@ def load_model(model_key: str):
         current_llm = Llama(
             model_path=model_path,
-            n_ctx=2048,                    # уменьшено для стабильности
             n_threads=os.cpu_count() or 4,
-            n_gpu_layers=0,                # только CPU
-            verbose=True,
-            chat_format="llama-3"
         )
         current_model_name = model_key
-        print(f"✅ Успешно загружена модель: {model_key}")
     except Exception as e:
-        print(f"❌ Ошибка загрузки модели {model_key}: {e}")
-        raise HTTPException(status_code=500, detail=f"Не удалось загрузить модель: {str(e)}")
 @app.on_event("startup")
 async def startup_event():
-    # Загружаем medium по умолчанию — она работает стабильно
-    load_model("medium")
 class ChatRequest(BaseModel):
     prompt: str
-    model_type: str = "medium"      # по умолчанию medium
-    api_key: str = ""               # пока можно пустой, позже добавим проверку
 @app.post("/chat")
 async def chat(request: ChatRequest):
-    # Если пользователь попросил другую модель — пытаемся загрузить
     if request.model_type != current_model_name:
         load_model(request.model_type)
     try:
-        output = current_llm.create_chat_completion(
-            messages=[
-                {"role": "system", "content": "Ты полезный и дружелюбный помощник."},
-                {"role": "user", "content": request.prompt}
-            ],
             max_tokens=512,
-            temperature=0.7
         )
-        return {"response": output["choices"][0]["message"]["content"].strip()}
     except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
 @app.get("/")
 async def health():
-    return {
-        "status": "online",
-        "current_model": current_model_name,
-        "available_models": list(MODELS.keys())
-    }

 current_llm = None
 current_model_name = ""
 MODELS = {
+    "medium": "xtime-v1beta-n-m_1p.gguf",
+    "small":  "xtime-v1beta-xp-r_2.gguf",
+    "large":  "xtime-v1beta-q4_K_M.gguf"
 }
 def load_model(model_key: str):
         current_llm = Llama(
             model_path=model_path,
+            n_ctx=2048,
             n_threads=os.cpu_count() or 4,
+            n_gpu_layers=0,
+            verbose=False,
+            chat_format=None   # важно! для Phi-2 не используем llama-3
         )
         current_model_name = model_key
+        print(f"✅ Модель {model_key} успешно загружена")
     except Exception as e:
+        print(f"❌ Ошибка загрузки {model_key}: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
 @app.on_event("startup")
 async def startup_event():
+    load_model("medium")   # по умолчанию самая стабильная
 class ChatRequest(BaseModel):
     prompt: str
+    model_type: str = "medium"
+    api_key: str = ""
 @app.post("/chat")
 async def chat(request: ChatRequest):
     if request.model_type != current_model_name:
         load_model(request.model_type)
     try:
+        # Для Phi-2 лучше использовать обычный create_completion
+        prompt = f"User: {request.prompt}\nAssistant:"
+        output = current_llm.create_completion(
+            prompt=prompt,
             max_tokens=512,
+            temperature=0.7,
+            stop=["User:", "<|endoftext|>"]
         )
+        response_text = output["choices"][0]["text"].strip()
+        return {"response": response_text}
     except Exception as e:
+        print(f"Ошибка при генерации: {e}")
+        raise HTTPException(status_code=500, detail="Ошибка генерации ответа")
 @app.get("/")
 async def health():
+    return {"status": "online", "model": current_model_name}