LevinAleksey commited on
Commit
5187777
·
verified ·
1 Parent(s): afeb2a6

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +100 -64
main.py CHANGED
@@ -1,14 +1,13 @@
1
  import os
2
  import httpx
3
  import json
4
- from fastapi import FastAPI, Request, HTTPException
5
  from fastapi.responses import StreamingResponse, JSONResponse
6
  from fastapi.middleware.cors import CORSMiddleware
7
  from qdrant_client import QdrantClient
8
 
9
  app = FastAPI()
10
 
11
- # 1. Настройка CORS
12
  app.add_middleware(
13
  CORSMiddleware,
14
  allow_origins=["*"],
@@ -19,113 +18,150 @@ app.add_middleware(
19
 
20
  COLLECTION_NAME = "equipment_registry"
21
 
22
- # Функция для эмбеддингов
23
- async def get_openrouter_embedding(text: str, api_key: str):
 
24
  async with httpx.AsyncClient() as client:
25
- payload = {
26
- "model": "openai/text-embedding-3-small",
27
- "input": text,
28
- "encoding_format": "float"
29
- }
30
- headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
31
- response = await client.post("https://openrouter.ai/api/v1/embeddings", headers=headers, json=payload, timeout=30.0)
 
 
 
 
 
 
32
  if response.status_code != 200:
33
  raise Exception(f"Embedding Error: {response.text}")
34
  return response.json()["data"][0]["embedding"]
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  @app.post("/api/chat")
37
  async def chat(request: Request):
38
- # ПРАВИЛЬНЫЕ ИМЕНА ИЗ ТВОИХ SECRETS
39
  OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
40
  QDRANT_URL = os.getenv("QDRANT_URL")
41
  QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
42
 
43
- if not OPENROUTER_API_KEY or not QDRANT_URL or not QDRANT_API_KEY:
44
- return JSONResponse(content={"error": "Missing environment variables on HF Space"}, status_code=500)
 
 
 
45
 
46
  try:
47
  body = await request.json()
48
  messages = body.get("messages", [])
 
49
  if not messages:
50
  return JSONResponse(content={"error": "No messages"}, status_code=400)
51
 
52
  user_query = messages[-1].get("content", "")
53
 
54
- # 1. Поиск в Qdrant
55
- vector = await get_openrouter_embedding(user_query, OPENROUTER_API_KEY)
56
 
57
- q_client = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY)
58
- search_results = q_client.search(
59
- collection_name=COLLECTION_NAME,
60
- query_vector=vector,
61
- limit=5
62
- )
63
 
64
- context = "\n\n".join([res.payload.get("search_text", "") for res in search_results if res.payload])
 
 
 
65
 
66
- system_prompt = (
67
- "Ты — инженерный ИИ-ассистент по реестру научного оборудования. "
68
- "Используй предоставленный контекст для точного ответа. "
69
- "Если данных нет, вежливо скажи об этом.\n\n"
70
- f"КОНТЕКСТ:\n{context}"
71
- )
72
 
73
- # 2. Стриминг ответа (DeepSeek-V3 через OpenRouter)
74
- async def generate():
75
- payload = {
76
- "model": "deepseek/deepseek-chat",
77
- "messages": [{"role": "system", "content": system_prompt}] + messages,
78
- "stream": True
79
- }
80
- headers = {
81
- "Authorization": f"Bearer {OPENROUTER_API_KEY}",
82
- "Content-Type": "application/json",
83
- "HTTP-Referer": "https://huggingface.co/",
84
- "X-OpenRouter-Title": "Inventory Assistant"
85
- }
86
 
 
 
87
  async with httpx.AsyncClient(timeout=120.0) as client:
88
- async with client.stream("POST", "https://openrouter.ai/api/v1/chat/completions", headers=headers, json=payload) as response:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  if response.status_code != 200:
90
- error_text = await response.aread()
91
- yield f"OpenRouter Error: {error_text.decode()}"
92
  return
93
 
94
  async for line in response.aiter_lines():
95
  if not line or not line.startswith("data: "):
96
  continue
97
-
98
- data_str = line[6:].strip()
99
- if data_str == "[DONE]":
100
  break
101
-
102
  try:
103
- chunk = json.loads(data_str)
104
- if not chunk.get('choices'):
105
- continue
106
-
107
- content = chunk['choices'][0].get('delta', {}).get('content', '')
 
108
  if content:
109
- # Отдаем чистый текст для useChat
110
  yield content
111
- except:
112
  continue
113
 
114
  return StreamingResponse(
115
- generate(),
116
- media_type="text/plain",
117
  headers={
118
  "Cache-Control": "no-cache",
119
- "X-Accel-Buffering": "no" # Чтобы HF не тормозил поток
 
120
  }
121
  )
122
 
123
  except Exception as e:
124
- print(f"Chat Error: {str(e)}")
125
- return JSONResponse(content={"detail": str(e)}, status_code=500)
 
126
 
127
  @app.get("/")
128
  async def health():
129
- # Проверка, видит ли сервер ключи
130
- check = "OK" if os.getenv("OPENROUTER_API_KEY") else "MISSING"
131
- return {"status": "running", "api_key": check}
 
 
 
 
 
 
 
 
1
  import os
2
  import httpx
3
  import json
4
+ from fastapi import FastAPI, Request
5
  from fastapi.responses import StreamingResponse, JSONResponse
6
  from fastapi.middleware.cors import CORSMiddleware
7
  from qdrant_client import QdrantClient
8
 
9
  app = FastAPI()
10
 
 
11
  app.add_middleware(
12
  CORSMiddleware,
13
  allow_origins=["*"],
 
18
 
19
  COLLECTION_NAME = "equipment_registry"
20
 
21
+
22
+ async def get_embedding(text: str, api_key: str) -> list[float]:
23
+ """Получение эмбеддинга через OpenRouter"""
24
  async with httpx.AsyncClient() as client:
25
+ response = await client.post(
26
+ "https://openrouter.ai/api/v1/embeddings",
27
+ headers={
28
+ "Authorization": f"Bearer {api_key}",
29
+ "Content-Type": "application/json"
30
+ },
31
+ json={
32
+ "model": "openai/text-embedding-3-small",
33
+ "input": text,
34
+ "encoding_format": "float"
35
+ },
36
+ timeout=30.0
37
+ )
38
  if response.status_code != 200:
39
  raise Exception(f"Embedding Error: {response.text}")
40
  return response.json()["data"][0]["embedding"]
41
 
42
+
43
+ def search_qdrant(query_vector: list[float], url: str, api_key: str) -> str:
44
+ """Поиск контекста в Qdrant"""
45
+ client = QdrantClient(url=url, api_key=api_key)
46
+ results = client.search(
47
+ collection_name=COLLECTION_NAME,
48
+ query_vector=query_vector,
49
+ limit=5
50
+ )
51
+ contexts = [
52
+ res.payload.get("search_text", "")
53
+ for res in results
54
+ if res.payload
55
+ ]
56
+ return "\n\n".join(contexts)
57
+
58
+
59
  @app.post("/api/chat")
60
  async def chat(request: Request):
61
+ # Переменные окружения
62
  OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
63
  QDRANT_URL = os.getenv("QDRANT_URL")
64
  QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
65
 
66
+ if not all([OPENROUTER_API_KEY, QDRANT_URL, QDRANT_API_KEY]):
67
+ return JSONResponse(
68
+ content={"error": "Missing environment variables"},
69
+ status_code=500
70
+ )
71
 
72
  try:
73
  body = await request.json()
74
  messages = body.get("messages", [])
75
+
76
  if not messages:
77
  return JSONResponse(content={"error": "No messages"}, status_code=400)
78
 
79
  user_query = messages[-1].get("content", "")
80
 
81
+ # 1. Получаем эмбеддинг запроса
82
+ query_vector = await get_embedding(user_query, OPENROUTER_API_KEY)
83
 
84
+ # 2. Ищем контекст в Qdrant
85
+ context = search_qdrant(query_vector, QDRANT_URL, QDRANT_API_KEY)
 
 
 
 
86
 
87
+ # 3. Системный промпт с контекстом
88
+ system_prompt = f"""Ты — инженерный ИИ-ассистент по реестру научного оборудования.
89
+ Используй предоставленный контекст для точного ответа.
90
+ Если данных нет в контексте, вежливо скажи об этом.
91
 
92
+ КОНТЕКСТ:
93
+ {context}"""
 
 
 
 
94
 
95
+ # 4. Формируем сообщения для LLM
96
+ llm_messages = [{"role": "system", "content": system_prompt}] + messages
 
 
 
 
 
 
 
 
 
 
 
97
 
98
+ # 5. Стриминг ответа
99
+ async def generate():
100
  async with httpx.AsyncClient(timeout=120.0) as client:
101
+ async with client.stream(
102
+ "POST",
103
+ "https://openrouter.ai/api/v1/chat/completions",
104
+ headers={
105
+ "Authorization": f"Bearer {OPENROUTER_API_KEY}",
106
+ "Content-Type": "application/json",
107
+ "HTTP-Referer": "https://huggingface.co/",
108
+ "X-Title": "Equipment Assistant"
109
+ },
110
+ json={
111
+ "model": "deepseek/deepseek-chat",
112
+ "messages": llm_messages,
113
+ "stream": True
114
+ }
115
+ ) as response:
116
  if response.status_code != 200:
117
+ error = await response.aread()
118
+ yield f"Error: {error.decode()}"
119
  return
120
 
121
  async for line in response.aiter_lines():
122
  if not line or not line.startswith("data: "):
123
  continue
124
+
125
+ data = line[6:].strip()
126
+ if data == "[DONE]":
127
  break
128
+
129
  try:
130
+ chunk = json.loads(data)
131
+ content = (
132
+ chunk.get("choices", [{}])[0]
133
+ .get("delta", {})
134
+ .get("content", "")
135
+ )
136
  if content:
 
137
  yield content
138
+ except json.JSONDecodeError:
139
  continue
140
 
141
  return StreamingResponse(
142
+ generate(),
143
+ media_type="text/event-stream",
144
  headers={
145
  "Cache-Control": "no-cache",
146
+ "Connection": "keep-alive",
147
+ "X-Accel-Buffering": "no"
148
  }
149
  )
150
 
151
  except Exception as e:
152
+ print(f"Chat Error: {e}")
153
+ return JSONResponse(content={"error": str(e)}, status_code=500)
154
+
155
 
156
  @app.get("/")
157
  async def health():
158
+ """Проверка здоровья сервиса"""
159
+ has_keys = all([
160
+ os.getenv("OPENROUTER_API_KEY"),
161
+ os.getenv("QDRANT_URL"),
162
+ os.getenv("QDRANT_API_KEY")
163
+ ])
164
+ return {
165
+ "status": "running",
166
+ "keys_configured": has_keys
167
+ }