Spaces:

Rid3
/

xtime-api

Sleeping

App Files Files Community

Rid3 commited on Mar 27

Commit

5c6c743

verified ·

1 Parent(s): 6b07912

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -11

app.py CHANGED Viewed

@@ -21,10 +21,11 @@ REPO_ID = "Rid3/xtime-v1beta-gguf-storage"
 current_llm = None
 current_model_name = ""
 MODELS = {
-    "medium": "xtime-v1beta-n-m_1p.gguf",
-    "large": "xtime-v1beta-q4_K_M.gguf",
-    "small": "xtime-v1beta-xp-r_2.gguf"
 }
 def load_model(model_key: str):
@@ -47,20 +48,55 @@ def load_model(model_key: str):
         current_llm = Llama(
             model_path=model_path,
-            n_ctx=4096,           # увеличил, если позволяет память
             n_threads=os.cpu_count() or 4,
-            n_gpu_layers=0,       # явно CPU
-            verbose=False,
             chat_format="llama-3"
         )
         current_model_name = model_key
-        print(f"✅ Model {model_key} loaded successfully")
     except Exception as e:
-        print(f"❌ Error loading model: {e}")
-        raise HTTPException(status_code=500, detail=str(e))
 @app.on_event("startup")
 async def startup_event():
-    load_model("large")   # или "medium", если large слишком тяжёлый
-# ... остальной код без изменений ...

 current_llm = None
 current_model_name = ""
+# Модели из твоего репозитория
 MODELS = {
+    "medium": "xtime-v1beta-n-m_1p.gguf",   # 2.8 ГБ — самая стабильная
+    "large":  "xtime-v1beta-q4_K_M.gguf",   # 5.9 ГБ — тяжёлая (пока не используем)
+    "small":  "xtime-v1beta-xp-r_2.gguf"    # очень лёгкая
 }
 def load_model(model_key: str):
         current_llm = Llama(
             model_path=model_path,
+            n_ctx=2048,                    # уменьшено для стабильности
             n_threads=os.cpu_count() or 4,
+            n_gpu_layers=0,                # только CPU
+            verbose=True,
             chat_format="llama-3"
         )
         current_model_name = model_key
+        print(f"✅ Успешно загружена модель: {model_key}")
     except Exception as e:
+        print(f"❌ Ошибка загрузки модели {model_key}: {e}")
+        raise HTTPException(status_code=500, detail=f"Не удалось загрузить модель: {str(e)}")
 @app.on_event("startup")
 async def startup_event():
+    # Загружаем medium по умолчанию — она работает стабильно
+    load_model("medium")
+class ChatRequest(BaseModel):
+    prompt: str
+    model_type: str = "medium"      # по умолчанию medium
+    api_key: str = ""               # пока можно пустой, позже добавим проверку
+@app.post("/chat")
+async def chat(request: ChatRequest):
+    # Если пользователь попросил другую модель — пытаемся загрузить
+    if request.model_type != current_model_name:
+        load_model(request.model_type)
+    try:
+        output = current_llm.create_chat_completion(
+            messages=[
+                {"role": "system", "content": "Ты полезный и дружелюбный помощник."},
+                {"role": "user", "content": request.prompt}
+            ],
+            max_tokens=512,
+            temperature=0.7
+        )
+        return {"response": output["choices"][0]["message"]["content"].strip()}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/")
+async def health():
+    return {
+        "status": "online",
+        "current_model": current_model_name,
+        "available_models": list(MODELS.keys())
+    }