IsmatS commited on
Commit
e716472
·
1 Parent(s): 3b014b5
Files changed (2) hide show
  1. Dockerfile +4 -4
  2. app/main.py +65 -38
Dockerfile CHANGED
@@ -1,8 +1,9 @@
1
- # SOCAR Hackathon - LLM Endpoint Dockerfile
2
  # Multi-stage build for optimized image size
 
3
 
4
  # Stage 1: Builder
5
- FROM python:3.10-slim as builder
6
 
7
  WORKDIR /app
8
 
@@ -16,7 +17,7 @@ COPY app/requirements.txt .
16
  RUN pip install --no-cache-dir --user -r requirements.txt
17
 
18
  # Stage 2: Runtime
19
- FROM python:3.10-slim
20
 
21
  WORKDIR /app
22
 
@@ -30,7 +31,6 @@ COPY --from=builder /root/.local /root/.local
30
 
31
  # Copy application code
32
  COPY app/ ./app/
33
- COPY .env.example .env
34
 
35
  # Add local bin to PATH
36
  ENV PATH=/root/.local/bin:$PATH
 
1
+ # SOCAR Hackathon - Complete AI System Dockerfile
2
  # Multi-stage build for optimized image size
3
+ # Includes OCR, LLM, and frontend capabilities
4
 
5
  # Stage 1: Builder
6
+ FROM python:3.11-slim as builder
7
 
8
  WORKDIR /app
9
 
 
17
  RUN pip install --no-cache-dir --user -r requirements.txt
18
 
19
  # Stage 2: Runtime
20
+ FROM python:3.11-slim
21
 
22
  WORKDIR /app
23
 
 
31
 
32
  # Copy application code
33
  COPY app/ ./app/
 
34
 
35
  # Add local bin to PATH
36
  ENV PATH=/root/.local/bin:$PATH
app/main.py CHANGED
@@ -15,8 +15,10 @@ from io import BytesIO
15
 
16
  import fitz # PyMuPDF
17
  from PIL import Image
18
- from fastapi import FastAPI, HTTPException, File, UploadFile
19
  from fastapi.middleware.cors import CORSMiddleware
 
 
20
  from pydantic import BaseModel
21
  from dotenv import load_dotenv
22
  from openai import AzureOpenAI
@@ -28,8 +30,8 @@ load_dotenv()
28
 
29
  # Initialize FastAPI app
30
  app = FastAPI(
31
- title="SOCAR Historical Documents Chatbot",
32
- description="RAG-based chatbot for SOCAR oil & gas historical documents",
33
  version="1.0.0"
34
  )
35
 
@@ -42,6 +44,10 @@ app.add_middleware(
42
  allow_headers=["*"],
43
  )
44
 
 
 
 
 
45
  # Initialize clients (lazy loading for faster startup)
46
  azure_client = None
47
  pinecone_index = None
@@ -97,6 +103,18 @@ class ChatResponse(BaseModel):
97
  model: str
98
 
99
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  def retrieve_documents(query: str, top_k: int = 3) -> List[Dict]:
101
  """
102
  Retrieve relevant documents from Pinecone vector database.
@@ -180,20 +198,9 @@ Cavab verərkən:
180
 
181
 
182
  @app.get("/")
183
- async def root():
184
- """Health check endpoint"""
185
- return {
186
- "status": "healthy",
187
- "service": "SOCAR LLM Chatbot",
188
- "version": "1.0.0",
189
- "model": "Llama-4-Maverick-17B (open-source)",
190
- "configuration": {
191
- "embedding": "BAAI/bge-large-en-v1.5",
192
- "retrieval": "top-3 vanilla",
193
- "prompt": "citation_focused",
194
- "benchmark_score": "55.67%"
195
- }
196
- }
197
 
198
 
199
  @app.get("/health")
@@ -220,8 +227,8 @@ async def health():
220
  }
221
 
222
 
223
- @app.post("/llm", response_model=ChatResponse)
224
- async def llm_endpoint(request: ChatRequest):
225
  """
226
  LLM chatbot endpoint for SOCAR historical documents.
227
 
@@ -235,17 +242,29 @@ async def llm_endpoint(request: ChatRequest):
235
  - Response time: ~3.6s
236
  - LLM Judge Score: 55.67%
237
  - Citation Score: 73.33%
 
 
 
 
238
  """
239
  try:
240
- # Extract the user's question (last message)
241
- if not request.messages:
242
- raise HTTPException(status_code=400, detail="No messages provided")
243
-
244
- user_messages = [msg for msg in request.messages if msg.role == "user"]
245
- if not user_messages:
246
- raise HTTPException(status_code=400, detail="No user message found")
247
-
248
- query = user_messages[-1].content
 
 
 
 
 
 
 
 
249
 
250
  # Retrieve relevant documents
251
  documents = retrieve_documents(query, top_k=3)
@@ -254,26 +273,34 @@ async def llm_endpoint(request: ChatRequest):
254
  answer, response_time = generate_answer(
255
  query=query,
256
  documents=documents,
257
- temperature=request.temperature,
258
- max_tokens=request.max_tokens
259
  )
260
 
261
- # Format sources
262
  sources = [
263
  {
264
  "pdf_name": doc['pdf_name'],
265
- "page_number": str(doc['page_number']),
266
  "relevance_score": f"{doc['score']:.3f}"
267
  }
268
  for doc in documents
269
  ]
270
 
271
- return ChatResponse(
272
- response=answer,
273
- sources=sources,
274
- response_time=round(response_time, 2),
275
- model="Llama-4-Maverick-17B-128E-Instruct-FP8"
276
- )
 
 
 
 
 
 
 
 
277
 
278
  except HTTPException:
279
  raise
 
15
 
16
  import fitz # PyMuPDF
17
  from PIL import Image
18
+ from fastapi import FastAPI, HTTPException, File, UploadFile, Request
19
  from fastapi.middleware.cors import CORSMiddleware
20
+ from fastapi.staticfiles import StaticFiles
21
+ from fastapi.templating import Jinja2Templates
22
  from pydantic import BaseModel
23
  from dotenv import load_dotenv
24
  from openai import AzureOpenAI
 
30
 
31
  # Initialize FastAPI app
32
  app = FastAPI(
33
+ title="SOCAR Historical Documents AI System",
34
+ description="RAG-based chatbot for SOCAR oil & gas historical documents with OCR capabilities",
35
  version="1.0.0"
36
  )
37
 
 
44
  allow_headers=["*"],
45
  )
46
 
47
+ # Mount static files and templates
48
+ app.mount("/static", StaticFiles(directory="app/static"), name="static")
49
+ templates = Jinja2Templates(directory="app/templates")
50
+
51
  # Initialize clients (lazy loading for faster startup)
52
  azure_client = None
53
  pinecone_index = None
 
103
  model: str
104
 
105
 
106
+ class QuestionRequest(BaseModel):
107
+ question: str
108
+ temperature: float = 0.2
109
+ max_tokens: int = 1000
110
+
111
+
112
+ class AnswerResponse(BaseModel):
113
+ answer: str
114
+ sources: List[Dict]
115
+ response_time: float
116
+
117
+
118
  def retrieve_documents(query: str, top_k: int = 3) -> List[Dict]:
119
  """
120
  Retrieve relevant documents from Pinecone vector database.
 
198
 
199
 
200
  @app.get("/")
201
+ async def root(request: Request):
202
+ """Serve the frontend web application"""
203
+ return templates.TemplateResponse("index.html", {"request": request})
 
 
 
 
 
 
 
 
 
 
 
204
 
205
 
206
  @app.get("/health")
 
227
  }
228
 
229
 
230
+ @app.post("/llm")
231
+ async def llm_endpoint(request: QuestionRequest | ChatRequest):
232
  """
233
  LLM chatbot endpoint for SOCAR historical documents.
234
 
 
242
  - Response time: ~3.6s
243
  - LLM Judge Score: 55.67%
244
  - Citation Score: 73.33%
245
+
246
+ Accepts two formats:
247
+ 1. QuestionRequest: {"question": "...", "temperature": 0.2, "max_tokens": 1000}
248
+ 2. ChatRequest: {"messages": [{"role": "user", "content": "..."}], ...}
249
  """
250
  try:
251
+ # Handle both request formats
252
+ if isinstance(request, QuestionRequest):
253
+ query = request.question
254
+ temperature = request.temperature
255
+ max_tokens = request.max_tokens
256
+ else: # ChatRequest
257
+ # Extract the user's question (last message)
258
+ if not request.messages:
259
+ raise HTTPException(status_code=400, detail="No messages provided")
260
+
261
+ user_messages = [msg for msg in request.messages if msg.role == "user"]
262
+ if not user_messages:
263
+ raise HTTPException(status_code=400, detail="No user message found")
264
+
265
+ query = user_messages[-1].content
266
+ temperature = request.temperature
267
+ max_tokens = request.max_tokens
268
 
269
  # Retrieve relevant documents
270
  documents = retrieve_documents(query, top_k=3)
 
273
  answer, response_time = generate_answer(
274
  query=query,
275
  documents=documents,
276
+ temperature=temperature,
277
+ max_tokens=max_tokens
278
  )
279
 
280
+ # Format sources for response
281
  sources = [
282
  {
283
  "pdf_name": doc['pdf_name'],
284
+ "page_number": doc['page_number'],
285
  "relevance_score": f"{doc['score']:.3f}"
286
  }
287
  for doc in documents
288
  ]
289
 
290
+ # Return appropriate response format
291
+ if isinstance(request, QuestionRequest):
292
+ return AnswerResponse(
293
+ answer=answer,
294
+ sources=sources,
295
+ response_time=round(response_time, 2)
296
+ )
297
+ else:
298
+ return ChatResponse(
299
+ response=answer,
300
+ sources=[{k: str(v) for k, v in s.items()} for s in sources],
301
+ response_time=round(response_time, 2),
302
+ model="Llama-4-Maverick-17B-128E-Instruct-FP8"
303
+ )
304
 
305
  except HTTPException:
306
  raise