IsmatS commited on
Commit
470cbeb
·
1 Parent(s): e716472
Files changed (4) hide show
  1. app/main.py +29 -16
  2. app/templates/index.html +8 -0
  3. docker-compose.yml +6 -2
  4. test_llm.json +1 -0
app/main.py CHANGED
@@ -136,9 +136,13 @@ def retrieve_documents(query: str, top_k: int = 3) -> List[Dict]:
136
  # Extract documents
137
  documents = []
138
  for match in results['matches']:
 
 
 
 
139
  documents.append({
140
  'pdf_name': match['metadata'].get('pdf_name', 'unknown.pdf'),
141
- 'page_number': match['metadata'].get('page_number', 0),
142
  'content': match['metadata'].get('text', ''),
143
  'score': match.get('score', 0.0)
144
  })
@@ -174,7 +178,7 @@ Sual: {query}
174
 
175
  Cavab verərkən:
176
  1. Dəqiq faktlar yazın
177
- 2. Hər faktı mənbə ilə göstərin: (PDF: fayl_adı.pdf, Səhifə: X)
178
  3. Kontekstdə olmayan məlumat əlavə etməyin"""
179
 
180
  try:
@@ -228,7 +232,7 @@ async def health():
228
 
229
 
230
  @app.post("/llm")
231
- async def llm_endpoint(request: QuestionRequest | ChatRequest):
232
  """
233
  LLM chatbot endpoint for SOCAR historical documents.
234
 
@@ -248,23 +252,32 @@ async def llm_endpoint(request: QuestionRequest | ChatRequest):
248
  2. ChatRequest: {"messages": [{"role": "user", "content": "..."}], ...}
249
  """
250
  try:
251
- # Handle both request formats
252
- if isinstance(request, QuestionRequest):
253
- query = request.question
254
- temperature = request.temperature
255
- max_tokens = request.max_tokens
256
- else: # ChatRequest
257
- # Extract the user's question (last message)
258
- if not request.messages:
 
 
 
 
 
 
259
  raise HTTPException(status_code=400, detail="No messages provided")
260
 
261
- user_messages = [msg for msg in request.messages if msg.role == "user"]
262
  if not user_messages:
263
  raise HTTPException(status_code=400, detail="No user message found")
264
 
265
- query = user_messages[-1].content
266
- temperature = request.temperature
267
- max_tokens = request.max_tokens
 
 
 
268
 
269
  # Retrieve relevant documents
270
  documents = retrieve_documents(query, top_k=3)
@@ -288,7 +301,7 @@ async def llm_endpoint(request: QuestionRequest | ChatRequest):
288
  ]
289
 
290
  # Return appropriate response format
291
- if isinstance(request, QuestionRequest):
292
  return AnswerResponse(
293
  answer=answer,
294
  sources=sources,
 
136
  # Extract documents
137
  documents = []
138
  for match in results['matches']:
139
+ # Ensure page_number is always an integer (Pinecone may return float)
140
+ page_num = match['metadata'].get('page_number', 0)
141
+ page_num = int(page_num) if isinstance(page_num, (int, float)) else 0
142
+
143
  documents.append({
144
  'pdf_name': match['metadata'].get('pdf_name', 'unknown.pdf'),
145
+ 'page_number': page_num,
146
  'content': match['metadata'].get('text', ''),
147
  'score': match.get('score', 0.0)
148
  })
 
178
 
179
  Cavab verərkən:
180
  1. Dəqiq faktlar yazın
181
+ 2. Hər faktı mənbə ilə göstərin: (PDF: fayl_adı.pdf, Səhifə: X) - səhifə nömrəsini tam ədəd (integer) olaraq yazın, məsələn "Səhifə: 11" (11.0 yox)
182
  3. Kontekstdə olmayan məlumat əlavə etməyin"""
183
 
184
  try:
 
232
 
233
 
234
  @app.post("/llm")
235
+ async def llm_endpoint(request: Request):
236
  """
237
  LLM chatbot endpoint for SOCAR historical documents.
238
 
 
252
  2. ChatRequest: {"messages": [{"role": "user", "content": "..."}], ...}
253
  """
254
  try:
255
+ # Parse request body
256
+ body = await request.json()
257
+
258
+ # Determine request format and extract query
259
+ if "question" in body:
260
+ # QuestionRequest format
261
+ query = body.get("question")
262
+ temperature = body.get("temperature", 0.2)
263
+ max_tokens = body.get("max_tokens", 1000)
264
+ is_simple_format = True
265
+ elif "messages" in body:
266
+ # ChatRequest format
267
+ messages = body.get("messages", [])
268
+ if not messages:
269
  raise HTTPException(status_code=400, detail="No messages provided")
270
 
271
+ user_messages = [msg for msg in messages if msg.get("role") == "user"]
272
  if not user_messages:
273
  raise HTTPException(status_code=400, detail="No user message found")
274
 
275
+ query = user_messages[-1].get("content")
276
+ temperature = body.get("temperature", 0.2)
277
+ max_tokens = body.get("max_tokens", 1000)
278
+ is_simple_format = False
279
+ else:
280
+ raise HTTPException(status_code=400, detail="Invalid request format. Expected 'question' or 'messages' field.")
281
 
282
  # Retrieve relevant documents
283
  documents = retrieve_documents(query, top_k=3)
 
301
  ]
302
 
303
  # Return appropriate response format
304
+ if is_simple_format:
305
  return AnswerResponse(
306
  answer=answer,
307
  sources=sources,
app/templates/index.html CHANGED
@@ -4,6 +4,14 @@
4
  <meta charset="UTF-8">
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
  <title>SOCAR Historical Documents AI System</title>
 
 
 
 
 
 
 
 
7
  <link rel="stylesheet" href="{{ url_for('static', path='/css/style.css') }}">
8
  </head>
9
  <body>
 
4
  <meta charset="UTF-8">
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
  <title>SOCAR Historical Documents AI System</title>
7
+
8
+ <!-- Favicon -->
9
+ <link rel="icon" type="image/x-icon" href="{{ url_for('static', path='/favicon/favicon.ico') }}">
10
+ <link rel="apple-touch-icon" sizes="180x180" href="{{ url_for('static', path='/favicon/apple-touch-icon.png') }}">
11
+ <link rel="icon" type="image/png" sizes="192x192" href="{{ url_for('static', path='/favicon/icon-192.png') }}">
12
+ <link rel="icon" type="image/png" sizes="512x512" href="{{ url_for('static', path='/favicon/icon-512.png') }}">
13
+
14
+ <!-- Stylesheet -->
15
  <link rel="stylesheet" href="{{ url_for('static', path='/css/style.css') }}">
16
  </head>
17
  <body>
docker-compose.yml CHANGED
@@ -1,11 +1,11 @@
1
  version: '3.8'
2
 
3
  services:
4
- llm-api:
5
  build:
6
  context: .
7
  dockerfile: Dockerfile
8
- container_name: socar-llm-api
9
  ports:
10
  - "8000:8000"
11
  env_file:
@@ -24,6 +24,10 @@ services:
24
  start_period: 40s
25
  networks:
26
  - socar-network
 
 
 
 
27
 
28
  networks:
29
  socar-network:
 
1
  version: '3.8'
2
 
3
  services:
4
+ socar-ai-system:
5
  build:
6
  context: .
7
  dockerfile: Dockerfile
8
+ container_name: socar-ai-system
9
  ports:
10
  - "8000:8000"
11
  env_file:
 
24
  start_period: 40s
25
  networks:
26
  - socar-network
27
+ labels:
28
+ - "com.socar.description=SOCAR Historical Documents AI System"
29
+ - "com.socar.features=OCR,LLM,Frontend"
30
+ - "com.socar.version=1.0.0"
31
 
32
  networks:
33
  socar-network:
test_llm.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"question":"Neft hasilatı haqqında nə məlumat var?"}