| | import os |
| | import json |
| | from fastapi import FastAPI, HTTPException |
| | from fastapi.responses import StreamingResponse, FileResponse |
| | from fastapi.staticfiles import StaticFiles |
| | from pydantic import BaseModel |
| | from llama_cpp import Llama |
| | from huggingface_hub import hf_hub_download |
| | from tavily import TavilyClient |
| |
|
| | app = FastAPI(title="Qwen Turbo Search API") |
| |
|
| | |
| | TAVILY_API_KEY = os.getenv("TAVILY_API_KEY") |
| |
|
| | REPO_ID = "bartowski/Qwen2.5-1.5B-Instruct-GGUF" |
| | FILENAME = "Qwen2.5-1.5B-Instruct-Q6_K.gguf" |
| |
|
| | llm = None |
| | tavily_client = None |
| |
|
| | |
| | @app.on_event("startup") |
| | def startup_event(): |
| | global llm, tavily_client |
| | |
| | if TAVILY_API_KEY: |
| | tavily_client = TavilyClient(api_key=TAVILY_API_KEY) |
| | print("✅ Tavily Search подключен") |
| | else: |
| | print("⚠️ Нет TAVILY_API_KEY. Поиск работать не будет.") |
| |
|
| | print("🚀 Загрузка модели...") |
| | try: |
| | model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME, cache_dir="./models") |
| | llm = Llama( |
| | model_path=model_path, |
| | n_ctx=8192, |
| | n_threads=2, |
| | n_batch=1024, |
| | verbose=False |
| | ) |
| | print("✅ Модель готова!") |
| | except Exception as e: |
| | print(f"❌ Ошибка: {e}") |
| |
|
| | |
| | |
| | app.mount("/static", StaticFiles(directory="static"), name="static") |
| |
|
| | @app.get("/") |
| | def read_root(): |
| | |
| | return FileResponse('static/index.html') |
| |
|
| | |
| | def perform_search(query: str): |
| | if not tavily_client: return "Нет ключа Tavily.", [] |
| | print(f"🔎 Ищу: {query}") |
| | try: |
| | res = tavily_client.search(query=query, search_depth="advanced", max_results=5) |
| | text = "" |
| | sources = [] |
| | for i, r in enumerate(res['results']): |
| | idx = i + 1 |
| | text += f"ИСТОЧНИК [{idx}]: {r['title']}\nТЕКСТ: {r['content']}\n\n" |
| | sources.append({"id": idx, "title": r['title'], "url": r['url']}) |
| | return text, sources |
| | except Exception as e: |
| | print(f"Err: {e}") |
| | return "Ошибка поиска.", [] |
| |
|
| | |
| | class Message(BaseModel): |
| | role: str |
| | content: str |
| |
|
| | class ChatRequest(BaseModel): |
| | messages: list[Message] |
| | temperature: float = 0.6 |
| | max_tokens: int = 2048 |
| | stream: bool = True |
| | use_search: bool = False |
| |
|
| | @app.post("/v1/chat/completions") |
| | def chat_completions(req: ChatRequest): |
| | if not llm: raise HTTPException(503, "Loading...") |
| | |
| | msgs = [{"role": m.role, "content": m.content} for m in req.messages] |
| | |
| | |
| | if req.use_search: |
| | query = msgs[-1]['content'] |
| | context, sources = perform_search(query) |
| | |
| | sys_prompt = ( |
| | "Ты умный помощник. Отвечай на вопрос, используя ТОЛЬКО эти данные из интернета.\n" |
| | "Обязательно указывай источники [1], [2].\n" |
| | f"=== ДАННЫЕ ===\n{context}" |
| | ) |
| | |
| | sources_md = "\n\n**Источники:**\n" + "\n".join([f"{s['id']}. [{s['title']}]({s['url']})" for s in sources]) |
| | |
| | |
| | msgs.insert(0, {"role": "system", "content": sys_prompt}) |
| | else: |
| | sources_md = "" |
| |
|
| | |
| | def iter_response(): |
| | stream = llm.create_chat_completion( |
| | messages=msgs, |
| | temperature=req.temperature, |
| | max_tokens=req.max_tokens, |
| | stream=True |
| | ) |
| | for chunk in stream: |
| | yield f"data: {json.dumps(chunk)}\n\n" |
| | |
| | |
| | if sources_md: |
| | final_chunk = { |
| | "choices": [{"delta": {"content": sources_md}, "finish_reason": None}] |
| | } |
| | yield f"data: {json.dumps(final_chunk)}\n\n" |
| | |
| | yield "data: [DONE]\n\n" |
| |
|
| | return StreamingResponse(iter_response(), media_type="text/event-stream") |