init
Browse files- app/main.py +29 -16
- app/templates/index.html +8 -0
- docker-compose.yml +6 -2
- test_llm.json +1 -0
app/main.py
CHANGED
|
@@ -136,9 +136,13 @@ def retrieve_documents(query: str, top_k: int = 3) -> List[Dict]:
|
|
| 136 |
# Extract documents
|
| 137 |
documents = []
|
| 138 |
for match in results['matches']:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
documents.append({
|
| 140 |
'pdf_name': match['metadata'].get('pdf_name', 'unknown.pdf'),
|
| 141 |
-
'page_number':
|
| 142 |
'content': match['metadata'].get('text', ''),
|
| 143 |
'score': match.get('score', 0.0)
|
| 144 |
})
|
|
@@ -174,7 +178,7 @@ Sual: {query}
|
|
| 174 |
|
| 175 |
Cavab verərkən:
|
| 176 |
1. Dəqiq faktlar yazın
|
| 177 |
-
2. Hər faktı mənbə ilə göstərin: (PDF: fayl_adı.pdf, Səhifə: X)
|
| 178 |
3. Kontekstdə olmayan məlumat əlavə etməyin"""
|
| 179 |
|
| 180 |
try:
|
|
@@ -228,7 +232,7 @@ async def health():
|
|
| 228 |
|
| 229 |
|
| 230 |
@app.post("/llm")
|
| 231 |
-
async def llm_endpoint(request:
|
| 232 |
"""
|
| 233 |
LLM chatbot endpoint for SOCAR historical documents.
|
| 234 |
|
|
@@ -248,23 +252,32 @@ async def llm_endpoint(request: QuestionRequest | ChatRequest):
|
|
| 248 |
2. ChatRequest: {"messages": [{"role": "user", "content": "..."}], ...}
|
| 249 |
"""
|
| 250 |
try:
|
| 251 |
-
#
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 259 |
raise HTTPException(status_code=400, detail="No messages provided")
|
| 260 |
|
| 261 |
-
user_messages = [msg for msg in
|
| 262 |
if not user_messages:
|
| 263 |
raise HTTPException(status_code=400, detail="No user message found")
|
| 264 |
|
| 265 |
-
query = user_messages[-1].content
|
| 266 |
-
temperature =
|
| 267 |
-
max_tokens =
|
|
|
|
|
|
|
|
|
|
| 268 |
|
| 269 |
# Retrieve relevant documents
|
| 270 |
documents = retrieve_documents(query, top_k=3)
|
|
@@ -288,7 +301,7 @@ async def llm_endpoint(request: QuestionRequest | ChatRequest):
|
|
| 288 |
]
|
| 289 |
|
| 290 |
# Return appropriate response format
|
| 291 |
-
if
|
| 292 |
return AnswerResponse(
|
| 293 |
answer=answer,
|
| 294 |
sources=sources,
|
|
|
|
| 136 |
# Extract documents
|
| 137 |
documents = []
|
| 138 |
for match in results['matches']:
|
| 139 |
+
# Ensure page_number is always an integer (Pinecone may return float)
|
| 140 |
+
page_num = match['metadata'].get('page_number', 0)
|
| 141 |
+
page_num = int(page_num) if isinstance(page_num, (int, float)) else 0
|
| 142 |
+
|
| 143 |
documents.append({
|
| 144 |
'pdf_name': match['metadata'].get('pdf_name', 'unknown.pdf'),
|
| 145 |
+
'page_number': page_num,
|
| 146 |
'content': match['metadata'].get('text', ''),
|
| 147 |
'score': match.get('score', 0.0)
|
| 148 |
})
|
|
|
|
| 178 |
|
| 179 |
Cavab verərkən:
|
| 180 |
1. Dəqiq faktlar yazın
|
| 181 |
+
2. Hər faktı mənbə ilə göstərin: (PDF: fayl_adı.pdf, Səhifə: X) - səhifə nömrəsini tam ədəd (integer) olaraq yazın, məsələn "Səhifə: 11" (11.0 yox)
|
| 182 |
3. Kontekstdə olmayan məlumat əlavə etməyin"""
|
| 183 |
|
| 184 |
try:
|
|
|
|
| 232 |
|
| 233 |
|
| 234 |
@app.post("/llm")
|
| 235 |
+
async def llm_endpoint(request: Request):
|
| 236 |
"""
|
| 237 |
LLM chatbot endpoint for SOCAR historical documents.
|
| 238 |
|
|
|
|
| 252 |
2. ChatRequest: {"messages": [{"role": "user", "content": "..."}], ...}
|
| 253 |
"""
|
| 254 |
try:
|
| 255 |
+
# Parse request body
|
| 256 |
+
body = await request.json()
|
| 257 |
+
|
| 258 |
+
# Determine request format and extract query
|
| 259 |
+
if "question" in body:
|
| 260 |
+
# QuestionRequest format
|
| 261 |
+
query = body.get("question")
|
| 262 |
+
temperature = body.get("temperature", 0.2)
|
| 263 |
+
max_tokens = body.get("max_tokens", 1000)
|
| 264 |
+
is_simple_format = True
|
| 265 |
+
elif "messages" in body:
|
| 266 |
+
# ChatRequest format
|
| 267 |
+
messages = body.get("messages", [])
|
| 268 |
+
if not messages:
|
| 269 |
raise HTTPException(status_code=400, detail="No messages provided")
|
| 270 |
|
| 271 |
+
user_messages = [msg for msg in messages if msg.get("role") == "user"]
|
| 272 |
if not user_messages:
|
| 273 |
raise HTTPException(status_code=400, detail="No user message found")
|
| 274 |
|
| 275 |
+
query = user_messages[-1].get("content")
|
| 276 |
+
temperature = body.get("temperature", 0.2)
|
| 277 |
+
max_tokens = body.get("max_tokens", 1000)
|
| 278 |
+
is_simple_format = False
|
| 279 |
+
else:
|
| 280 |
+
raise HTTPException(status_code=400, detail="Invalid request format. Expected 'question' or 'messages' field.")
|
| 281 |
|
| 282 |
# Retrieve relevant documents
|
| 283 |
documents = retrieve_documents(query, top_k=3)
|
|
|
|
| 301 |
]
|
| 302 |
|
| 303 |
# Return appropriate response format
|
| 304 |
+
if is_simple_format:
|
| 305 |
return AnswerResponse(
|
| 306 |
answer=answer,
|
| 307 |
sources=sources,
|
app/templates/index.html
CHANGED
|
@@ -4,6 +4,14 @@
|
|
| 4 |
<meta charset="UTF-8">
|
| 5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
<title>SOCAR Historical Documents AI System</title>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
<link rel="stylesheet" href="{{ url_for('static', path='/css/style.css') }}">
|
| 8 |
</head>
|
| 9 |
<body>
|
|
|
|
| 4 |
<meta charset="UTF-8">
|
| 5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
<title>SOCAR Historical Documents AI System</title>
|
| 7 |
+
|
| 8 |
+
<!-- Favicon -->
|
| 9 |
+
<link rel="icon" type="image/x-icon" href="{{ url_for('static', path='/favicon/favicon.ico') }}">
|
| 10 |
+
<link rel="apple-touch-icon" sizes="180x180" href="{{ url_for('static', path='/favicon/apple-touch-icon.png') }}">
|
| 11 |
+
<link rel="icon" type="image/png" sizes="192x192" href="{{ url_for('static', path='/favicon/icon-192.png') }}">
|
| 12 |
+
<link rel="icon" type="image/png" sizes="512x512" href="{{ url_for('static', path='/favicon/icon-512.png') }}">
|
| 13 |
+
|
| 14 |
+
<!-- Stylesheet -->
|
| 15 |
<link rel="stylesheet" href="{{ url_for('static', path='/css/style.css') }}">
|
| 16 |
</head>
|
| 17 |
<body>
|
docker-compose.yml
CHANGED
|
@@ -1,11 +1,11 @@
|
|
| 1 |
version: '3.8'
|
| 2 |
|
| 3 |
services:
|
| 4 |
-
|
| 5 |
build:
|
| 6 |
context: .
|
| 7 |
dockerfile: Dockerfile
|
| 8 |
-
container_name: socar-
|
| 9 |
ports:
|
| 10 |
- "8000:8000"
|
| 11 |
env_file:
|
|
@@ -24,6 +24,10 @@ services:
|
|
| 24 |
start_period: 40s
|
| 25 |
networks:
|
| 26 |
- socar-network
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
networks:
|
| 29 |
socar-network:
|
|
|
|
| 1 |
version: '3.8'
|
| 2 |
|
| 3 |
services:
|
| 4 |
+
socar-ai-system:
|
| 5 |
build:
|
| 6 |
context: .
|
| 7 |
dockerfile: Dockerfile
|
| 8 |
+
container_name: socar-ai-system
|
| 9 |
ports:
|
| 10 |
- "8000:8000"
|
| 11 |
env_file:
|
|
|
|
| 24 |
start_period: 40s
|
| 25 |
networks:
|
| 26 |
- socar-network
|
| 27 |
+
labels:
|
| 28 |
+
- "com.socar.description=SOCAR Historical Documents AI System"
|
| 29 |
+
- "com.socar.features=OCR,LLM,Frontend"
|
| 30 |
+
- "com.socar.version=1.0.0"
|
| 31 |
|
| 32 |
networks:
|
| 33 |
socar-network:
|
test_llm.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"question":"Neft hasilatı haqqında nə məlumat var?"}
|