Spaces:

Rid3
/

xtime-api

Sleeping

App Files Files Community

Rid3 commited on Mar 28

Commit

3808e95

verified ·

1 Parent(s): 0e057d9

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -63

app.py CHANGED Viewed

@@ -6,9 +6,9 @@ from pydantic import BaseModel
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
-app = FastAPI(title="Xtime GGUF Remote API")
-# Настройка CORS для удаленного подключения
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
@@ -17,87 +17,77 @@ app.add_middleware(
     allow_headers=["*"],
 )
-# Глобальные переменные для хранения текущей модели в памяти
-current_llm = None
-current_model_id = "" # format: repo_id/filename
 class ChatRequest(BaseModel):
-    repo_id: str          # Ссылка на репозиторий (напр. "bartowski/Llama-3.2-3B-Instruct-GGUF")
-    filename: str         # Имя файла (напр. "Llama-3.2-3B-Instruct-Q4_K_M.gguf")
-    prompt: str           # Текст пользователя
     system_prompt: str = "You are a helpful assistant."
     max_tokens: int = 512
     temperature: float = 0.7
-def load_model_if_new(repo_id: str, filename: str):
-    """Загружает модель, если она еще не в памяти или если пришла новая ссылка"""
-    global current_llm, current_model_id
-    new_model_id = f"{repo_id}/{filename}"
-    # Если модель уже загружена, просто выходим
-    if current_llm is not None and current_model_id == new_model_id:
-        return
-    print(f"--- Загрузка новой модели: {new_model_id} ---")
-    # Очистка памяти перед загрузкой новой модели
-    if current_llm is not None:
-        del current_llm
-        gc.collect()
-    try:
-        # Скачивание файла с Hugging Face (использует кэш, если файл уже есть)
-        model_path = hf_hub_download(repo_id=repo_id, filename=filename)
-        # Инициализация Llama
-        current_llm = Llama(
-            model_path=model_path,
-            n_ctx=2048,
-            n_threads=os.cpu_count() or 4,
-            n_gpu_layers=0, # Установите > 0, если у вас есть GPU
-            verbose=False
-        )
-        current_model_id = new_model_id
-        print(f"✅ Модель {filename} успешно загружена и готова")
-    except Exception as e:
-        print(f"❌ Ошибка при загрузке модели: {e}")
-        raise HTTPException(status_code=500, detail=f"Failed to load model: {str(e)}")
 @app.post("/chat")
 async def chat(request: ChatRequest):
-    """Эндпоинт для чата, который сам переключает модели"""
-    # 1. Проверяем/загружаем модель
-    load_model_if_new(request.repo_id, request.filename)
     try:
-        # 2. Генерация ответа (базовый формат, подходящий для большинства моделей)
-        formatted_prompt = f"System: {request.system_prompt}\nUser: {request.prompt}\nAssistant:"
-        output = current_llm.create_completion(
-            prompt=formatted_prompt,
             max_tokens=request.max_tokens,
             temperature=request.temperature,
-            stop=["User:", "System:", "</s>", "<|endoftext|>"]
         )
         return {
             "response": output["choices"][0]["text"].strip(),
-            "model_id": current_model_id
         }
     except Exception as e:
-        print(f"Ошибка генерации: {e}")
-        raise HTTPException(status_code=500, detail="Generation error")
-@app.get("/health")
-async def health():
-    """Проверка состояния сервера"""
-    return {
-        "status": "online",
-        "current_model": current_model_id if current_model_id else "None"
-    }
 if __name__ == "__main__":
     import uvicorn
-    # Запуск на порту 7860 (стандарт для HF Spaces)
     uvicorn.run(app, host="0.0.0.0", port=7860)

 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
+app = FastAPI()
+# Разрешаем все подключения
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
     allow_headers=["*"],
 )
+# Глобальная переменная для модели
+model = None
+current_id = ""
 class ChatRequest(BaseModel):
+    repo_id: str
+    filename: str
+    prompt: str
     system_prompt: str = "You are a helpful assistant."
     max_tokens: int = 512
     temperature: float = 0.7
+@app.get("/")
+async def health():
+    return {"status": "online", "info": "Server is running. Send POST to /chat"}
 @app.post("/chat")
 async def chat(request: ChatRequest):
+    global model, current_id
+    new_id = f"{request.repo_id}/{request.filename}"
     try:
+        # 1. Загружаем модель, если она еще не в памяти
+        if model is None or current_id != new_id:
+            print(f"--- Loading model: {new_id} ---")
+            if model is not None:
+                del model
+                gc.collect()
+            # Скачивание файла (использует кэш HF)
+            path = hf_hub_download(repo_id=request.repo_id, filename=request.filename)
+            model = Llama(
+                model_path=path,
+                n_ctx=2048, # Оптимально для 16ГБ RAM
+                n_threads=os.cpu_count() or 4,
+                n_gpu_layers=0, # Только CPU
+                verbose=False
+            )
+            current_id = new_id
+        # 2. Форматируем промпт и генерируем ответ
+        full_prompt = f"System: {request.system_prompt}\nUser: {request.prompt}\nAssistant:"
+        output = model.create_completion(
+            prompt=full_prompt,
             max_tokens=request.max_tokens,
             temperature=request.temperature,
+            stop=["User:", "System:", "</s>"]
         )
         return {
             "response": output["choices"][0]["text"].strip(),
+            "model": current_id
         }
     except Exception as e:
+        print(f"Error: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
 if __name__ == "__main__":
     import uvicorn
+    # Автоматический вывод ссылки для подключения
+    space_id = os.getenv("SPACE_ID")
+    if space_id:
+        # Прямая ссылка на API для внешних программ
+        host_link = f"https://{space_id.replace('/', '-').lower()}.hf.space/chat"
+        print("\n" + "="*50)
+        print(f"URL ДЛЯ ПОДКЛЮЧЕНИЯ:\n{host_link}")
+        print("="*50 + "\n")
     uvicorn.run(app, host="0.0.0.0", port=7860)