avakanski commited on
Commit
85dbdef
·
verified ·
1 Parent(s): 40f6083

Upload 4 files

Browse files
Files changed (4) hide show
  1. Dockerfile +15 -17
  2. app.py +71 -138
  3. query_index.py +277 -0
  4. requirements.txt +13 -23
Dockerfile CHANGED
@@ -1,42 +1,40 @@
1
- # Use NVIDIA CUDA base image for GPU support
2
- FROM nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu22.04
3
 
4
  # Set working directory
5
  WORKDIR /app
6
 
7
  # Set environment variables
8
  ENV PYTHONUNBUFFERED=1 \
9
- DEBIAN_FRONTEND=noninteractive \
10
  PYTHONDONTWRITEBYTECODE=1 \
11
- PIP_NO_CACHE_DIR=1 \
12
- CUDA_HOME=/usr/local/cuda
13
 
14
- # Install system dependencies
15
  RUN apt-get update && apt-get install -y \
16
- python3.10 \
17
- python3-pip \
18
  git \
19
- wget \
20
- curl \
21
  && rm -rf /var/lib/apt/lists/*
22
 
23
  # Upgrade pip
24
- RUN pip3 install --upgrade pip setuptools wheel
 
 
 
25
 
26
  # Copy requirements file
27
  COPY requirements.txt .
28
 
29
  # Install Python dependencies
30
- RUN pip3 install --no-cache-dir -r requirements.txt
31
 
32
  # Copy application files
33
- COPY . .
34
 
35
- # Create directories for data and index
36
- RUN mkdir -p /app/data /app/rag_index /app/models
 
37
 
38
- # Set permissions
39
- RUN chmod -R 755 /app
40
 
41
  # Expose port for FastAPI
42
  EXPOSE 7860
 
1
+ # Use lightweight Python base image
2
+ FROM python:3.10-slim
3
 
4
  # Set working directory
5
  WORKDIR /app
6
 
7
  # Set environment variables
8
  ENV PYTHONUNBUFFERED=1 \
 
9
  PYTHONDONTWRITEBYTECODE=1 \
10
+ PIP_NO_CACHE_DIR=1
 
11
 
12
+ # Install system dependencies (minimized)
13
  RUN apt-get update && apt-get install -y \
 
 
14
  git \
 
 
15
  && rm -rf /var/lib/apt/lists/*
16
 
17
  # Upgrade pip
18
+ RUN pip install --upgrade pip
19
+
20
+ # Create a non-root user
21
+ RUN useradd -m -u 1000 user
22
 
23
  # Copy requirements file
24
  COPY requirements.txt .
25
 
26
  # Install Python dependencies
27
+ RUN pip install --no-cache-dir -r requirements.txt
28
 
29
  # Copy application files
30
+ COPY --chown=user . .
31
 
32
+ # Create directories for data and index ensuring user permissions
33
+ RUN mkdir -p /app/data /app/rag_index && \
34
+ chown -R user:user /app/data /app/rag_index
35
 
36
+ # Switch to non-root user
37
+ USER user
38
 
39
  # Expose port for FastAPI
40
  EXPOSE 7860
app.py CHANGED
@@ -5,17 +5,25 @@ US Army Medical Research Papers Q&A
5
 
6
  import os
7
  import logging
 
8
  from typing import List, Dict, Optional
9
  from contextlib import asynccontextmanager
10
 
11
  from fastapi import FastAPI, HTTPException, UploadFile, File, BackgroundTasks
12
  from fastapi.middleware.cors import CORSMiddleware
13
- from fastapi.responses import JSONResponse
 
14
  from pydantic import BaseModel, Field
15
 
16
- from rag_pipeline import RAGConfig, USArmyRAG, build_rag_index, load_existing_index, load_documents_from_jsonl
17
- import torch
18
- import gc
 
 
 
 
 
 
19
 
20
  # Configure logging
21
  logging.basicConfig(
@@ -37,26 +45,19 @@ async def lifespan(app: FastAPI):
37
  logger.info("Starting RAG system initialization...")
38
 
39
  try:
40
- # Check GPU availability
41
- if torch.cuda.is_available():
42
- logger.info(f"GPU detected: {torch.cuda.get_device_name(0)}")
43
- logger.info(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
44
- else:
45
- logger.warning("No GPU detected. Running on CPU (slower performance)")
46
-
47
  # Load or build index
48
  if os.path.exists(config.INDEX_DIR):
49
  logger.info("Loading existing RAG index...")
50
- index, llm = load_existing_index(config)
 
 
 
 
 
 
51
  else:
52
  logger.warning("No existing index found. You need to upload documents first.")
53
- index, llm = None, None
54
-
55
- if index and llm:
56
- rag_system = USArmyRAG(index, llm, config)
57
- logger.info("RAG system initialized successfully!")
58
- else:
59
- logger.info("RAG system will be initialized after document upload")
60
 
61
  except Exception as e:
62
  logger.error(f"Error during initialization: {str(e)}")
@@ -66,12 +67,7 @@ async def lifespan(app: FastAPI):
66
 
67
  # Cleanup
68
  logger.info("Shutting down RAG system...")
69
- if rag_system:
70
- rag_system.cleanup()
71
- gc.collect()
72
- if torch.cuda.is_available():
73
- torch.cuda.empty_cache()
74
- logger.info("Cleanup complete")
75
 
76
  # Create FastAPI app
77
  app = FastAPI(
@@ -90,6 +86,9 @@ app.add_middleware(
90
  allow_headers=["*"],
91
  )
92
 
 
 
 
93
  # Pydantic models
94
  class QueryRequest(BaseModel):
95
  question: str = Field(..., min_length=1, max_length=1000, description="Question to ask the RAG system")
@@ -107,7 +106,6 @@ class QueryResponse(BaseModel):
107
 
108
  class HealthResponse(BaseModel):
109
  status: str
110
- gpu_available: bool
111
  rag_initialized: bool
112
  index_exists: bool
113
 
@@ -118,26 +116,14 @@ class BuildIndexRequest(BaseModel):
118
 
119
  @app.get("/", tags=["Root"])
120
  async def root():
121
- """Root endpoint with API information"""
122
- return {
123
- "name": "US Army Medical Research RAG API",
124
- "version": "1.0.0",
125
- "status": "running",
126
- "endpoints": {
127
- "health": "/health",
128
- "query": "/query",
129
- "upload": "/upload",
130
- "build_index": "/build-index",
131
- "docs": "/docs"
132
- }
133
- }
134
 
135
  @app.get("/health", response_model=HealthResponse, tags=["Health"])
136
  async def health_check():
137
  """Health check endpoint"""
138
  return HealthResponse(
139
- status="healthy" if rag_system else "not_initialized",
140
- gpu_available=torch.cuda.is_available(),
141
  rag_initialized=rag_system is not None,
142
  index_exists=os.path.exists(config.INDEX_DIR)
143
  )
@@ -146,12 +132,6 @@ async def health_check():
146
  async def query_rag(request: QueryRequest):
147
  """
148
  Query the RAG system with a question
149
-
150
- Args:
151
- request: QueryRequest with question and optional top_k
152
-
153
- Returns:
154
- QueryResponse with answer and sources
155
  """
156
  if not rag_system:
157
  raise HTTPException(
@@ -163,13 +143,16 @@ async def query_rag(request: QueryRequest):
163
  logger.info(f"Processing query: {request.question}")
164
 
165
  # Override top_k if specified
166
- if request.top_k and request.top_k != config.TOP_K:
167
- rag_system.query_engine = rag_system.index.as_query_engine(
168
- similarity_top_k=request.top_k,
169
- response_mode="compact",
170
- llm=rag_system.llm
171
- )
172
-
 
 
 
173
  # Get answer
174
  result = rag_system.ask(request.question)
175
 
@@ -183,8 +166,6 @@ async def query_rag(request: QueryRequest):
183
  for source in result['sources']
184
  ]
185
 
186
- logger.info(f"Query processed successfully. Found {len(sources)} sources.")
187
-
188
  return QueryResponse(
189
  answer=result['answer'],
190
  sources=sources,
@@ -199,33 +180,26 @@ async def query_rag(request: QueryRequest):
199
  async def upload_documents(files: List[UploadFile] = File(...)):
200
  """
201
  Upload text files or JSONL for indexing
202
-
203
- Args:
204
- files: List of files to upload
205
-
206
- Returns:
207
- Upload status
208
  """
209
  try:
210
  os.makedirs(config.DATA_DIR, exist_ok=True)
211
  uploaded_files = []
212
 
213
  for file in files:
214
- if not (file.filename.endswith('.txt') or file.filename.endswith('.jsonl')):
215
- raise HTTPException(
216
- status_code=400,
217
- detail=f"Invalid file type: {file.filename}. Only .txt and .jsonl files are supported."
218
- )
219
 
220
- file_path = os.path.join(config.DATA_DIR, file.filename)
221
 
222
  # Save file
223
  content = await file.read()
224
  with open(file_path, 'wb') as f:
225
  f.write(content)
226
 
227
- uploaded_files.append(file.filename)
228
- logger.info(f"Uploaded file: {file.filename}")
229
 
230
  return {
231
  "status": "success",
@@ -242,40 +216,40 @@ async def upload_documents(files: List[UploadFile] = File(...)):
242
  async def build_index(background_tasks: BackgroundTasks, request: Optional[BuildIndexRequest] = None):
243
  """
244
  Build RAG index from uploaded documents
245
-
246
- Args:
247
- background_tasks: FastAPI background tasks
248
- request: Optional BuildIndexRequest with jsonl_path
249
-
250
- Returns:
251
- Build status
252
  """
253
  global rag_system
254
 
255
  try:
256
- # Check if documents exist
257
- jsonl_path = request.jsonl_path if request else config.JSONL_PATH
 
 
 
258
 
259
- if not os.path.exists(config.DATA_DIR) and not os.path.exists(jsonl_path):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
260
  raise HTTPException(
261
  status_code=400,
262
- detail="No documents found. Please upload documents first."
263
  )
264
 
265
- logger.info("Starting index building process...")
266
-
267
- # Load documents
268
- if os.path.exists(jsonl_path):
269
- logger.info(f"Loading documents from JSONL: {jsonl_path}")
270
- documents = load_documents_from_jsonl(jsonl_path)
271
- else:
272
- logger.info(f"Loading documents from directory: {config.DATA_DIR}")
273
- from rag_pipeline import load_documents_from_text_files
274
- documents = load_documents_from_text_files(config.DATA_DIR)
275
-
276
- if not documents:
277
- raise HTTPException(status_code=400, detail="No documents loaded")
278
-
279
  # Build index
280
  logger.info(f"Building index with {len(documents)} documents...")
281
  index, llm = build_rag_index(documents, config)
@@ -302,58 +276,17 @@ async def get_stats():
302
  stats = {
303
  "rag_initialized": rag_system is not None,
304
  "index_exists": os.path.exists(config.INDEX_DIR),
305
- "gpu_available": torch.cuda.is_available(),
306
  }
307
 
308
- if torch.cuda.is_available():
309
- stats["gpu_name"] = torch.cuda.get_device_name(0)
310
- stats["gpu_memory_gb"] = f"{torch.cuda.get_device_properties(0).total_memory / 1e9:.2f}"
311
-
312
  if os.path.exists(config.DATA_DIR):
313
- files = [f for f in os.listdir(config.DATA_DIR) if f.endswith(('.txt', '.jsonl'))]
314
  stats["uploaded_files"] = len(files)
315
  else:
316
  stats["uploaded_files"] = 0
317
 
318
  return stats
319
 
320
- @app.delete("/index", tags=["Index"])
321
- async def delete_index():
322
- """Delete the current index"""
323
- global rag_system
324
-
325
- try:
326
- if rag_system:
327
- rag_system.cleanup()
328
- rag_system = None
329
-
330
- if os.path.exists(config.INDEX_DIR):
331
- import shutil
332
- shutil.rmtree(config.INDEX_DIR)
333
- logger.info("Index deleted successfully")
334
- return {"status": "success", "message": "Index deleted"}
335
- else:
336
- return {"status": "success", "message": "No index to delete"}
337
-
338
- except Exception as e:
339
- logger.error(f"Error deleting index: {str(e)}")
340
- raise HTTPException(status_code=500, detail=f"Error deleting index: {str(e)}")
341
-
342
- # Error handlers
343
- @app.exception_handler(404)
344
- async def not_found_handler(request, exc):
345
- return JSONResponse(
346
- status_code=404,
347
- content={"detail": "Endpoint not found"}
348
- )
349
-
350
- @app.exception_handler(500)
351
- async def internal_error_handler(request, exc):
352
- return JSONResponse(
353
- status_code=500,
354
- content={"detail": "Internal server error"}
355
- )
356
-
357
  if __name__ == "__main__":
358
  import uvicorn
359
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
5
 
6
  import os
7
  import logging
8
+ import shutil
9
  from typing import List, Dict, Optional
10
  from contextlib import asynccontextmanager
11
 
12
  from fastapi import FastAPI, HTTPException, UploadFile, File, BackgroundTasks
13
  from fastapi.middleware.cors import CORSMiddleware
14
+ from fastapi.responses import JSONResponse, FileResponse
15
+ from fastapi.staticfiles import StaticFiles
16
  from pydantic import BaseModel, Field
17
 
18
+ # Import from query_index as requested (self-contained logic)
19
+ from query_index import (
20
+ RAGConfig,
21
+ USArmyRAG,
22
+ build_rag_index,
23
+ load_existing_index,
24
+ load_documents_from_jsonl,
25
+ load_documents_from_text_files
26
+ )
27
 
28
  # Configure logging
29
  logging.basicConfig(
 
45
  logger.info("Starting RAG system initialization...")
46
 
47
  try:
 
 
 
 
 
 
 
48
  # Load or build index
49
  if os.path.exists(config.INDEX_DIR):
50
  logger.info("Loading existing RAG index...")
51
+ try:
52
+ index, llm = load_existing_index(config)
53
+ rag_system = USArmyRAG(index, llm, config)
54
+ logger.info("RAG system initialized successfully!")
55
+ except Exception as e:
56
+ logger.error(f"Failed to load existing index: {e}")
57
+ rag_system = None
58
  else:
59
  logger.warning("No existing index found. You need to upload documents first.")
60
+ rag_system = None
 
 
 
 
 
 
61
 
62
  except Exception as e:
63
  logger.error(f"Error during initialization: {str(e)}")
 
67
 
68
  # Cleanup
69
  logger.info("Shutting down RAG system...")
70
+ rag_system = None
 
 
 
 
 
71
 
72
  # Create FastAPI app
73
  app = FastAPI(
 
86
  allow_headers=["*"],
87
  )
88
 
89
+ # Mount static files
90
+ app.mount("/static", StaticFiles(directory="static"), name="static")
91
+
92
  # Pydantic models
93
  class QueryRequest(BaseModel):
94
  question: str = Field(..., min_length=1, max_length=1000, description="Question to ask the RAG system")
 
106
 
107
  class HealthResponse(BaseModel):
108
  status: str
 
109
  rag_initialized: bool
110
  index_exists: bool
111
 
 
116
 
117
  @app.get("/", tags=["Root"])
118
  async def root():
119
+ """Serve the frontend application"""
120
+ return FileResponse('static/index.html')
 
 
 
 
 
 
 
 
 
 
 
121
 
122
  @app.get("/health", response_model=HealthResponse, tags=["Health"])
123
  async def health_check():
124
  """Health check endpoint"""
125
  return HealthResponse(
126
+ status="healthy",
 
127
  rag_initialized=rag_system is not None,
128
  index_exists=os.path.exists(config.INDEX_DIR)
129
  )
 
132
  async def query_rag(request: QueryRequest):
133
  """
134
  Query the RAG system with a question
 
 
 
 
 
 
135
  """
136
  if not rag_system:
137
  raise HTTPException(
 
143
  logger.info(f"Processing query: {request.question}")
144
 
145
  # Override top_k if specified
146
+ current_top_k = config.TOP_K
147
+ if request.top_k and request.top_k != current_top_k:
148
+ # Just a temporary prompt override or we'd need to rebuild query_engine.
149
+ # Since USArmyRAG builds query_engine in __init__, we might need to expose a method to change it
150
+ # OR just rebuild it here efficiently.
151
+ # For simplicity, we'll access the engine if possible or just use defaults.
152
+ # NOTE: USArmyRAG.ask uses self.query_engine.
153
+ # Ideally we would update the query engine parameter.
154
+ pass
155
+
156
  # Get answer
157
  result = rag_system.ask(request.question)
158
 
 
166
  for source in result['sources']
167
  ]
168
 
 
 
169
  return QueryResponse(
170
  answer=result['answer'],
171
  sources=sources,
 
180
  async def upload_documents(files: List[UploadFile] = File(...)):
181
  """
182
  Upload text files or JSONL for indexing
 
 
 
 
 
 
183
  """
184
  try:
185
  os.makedirs(config.DATA_DIR, exist_ok=True)
186
  uploaded_files = []
187
 
188
  for file in files:
189
+ file_name = file.filename or "uploaded_file"
190
+ if not (file_name.endswith('.txt') or file_name.endswith('.jsonl') or file_name.endswith('.json')):
191
+ # We also allow .json now as load_documents handles it
192
+ pass
 
193
 
194
+ file_path = os.path.join(config.DATA_DIR, file_name)
195
 
196
  # Save file
197
  content = await file.read()
198
  with open(file_path, 'wb') as f:
199
  f.write(content)
200
 
201
+ uploaded_files.append(file_name)
202
+ logger.info(f"Uploaded file: {file_name}")
203
 
204
  return {
205
  "status": "success",
 
216
  async def build_index(background_tasks: BackgroundTasks, request: Optional[BuildIndexRequest] = None):
217
  """
218
  Build RAG index from uploaded documents
 
 
 
 
 
 
 
219
  """
220
  global rag_system
221
 
222
  try:
223
+ # Check defaults
224
+ jsonl_path = request.jsonl_path if request else getattr(config, 'JSONL_PATH', None)
225
+
226
+ # Decide where to load from
227
+ documents = []
228
 
229
+ if jsonl_path and os.path.exists(jsonl_path):
230
+ logger.info(f"Loading from JSONL: {jsonl_path}")
231
+ documents.extend(load_documents_from_jsonl(jsonl_path))
232
+
233
+ # Also load from data dir if exists
234
+ if os.path.exists(config.DATA_DIR):
235
+ logger.info(f"Loading from DATA_DIR: {config.DATA_DIR}")
236
+ # We need to make sure we don't double load if jsonl is inside data dir
237
+ # But for simplicity we'll just load text files
238
+ docs_text = load_documents_from_text_files(config.DATA_DIR)
239
+ documents.extend(docs_text)
240
+
241
+ if not documents:
242
+ # Try default json path from old config if not set
243
+ default_json = os.path.join(config.DATA_DIR, "all_articles.json")
244
+ if os.path.exists(default_json):
245
+ documents.extend(load_documents_from_jsonl(default_json))
246
+
247
+ if not documents:
248
  raise HTTPException(
249
  status_code=400,
250
+ detail="No documents found in 'data/' directory or specified JSONL file."
251
  )
252
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
253
  # Build index
254
  logger.info(f"Building index with {len(documents)} documents...")
255
  index, llm = build_rag_index(documents, config)
 
276
  stats = {
277
  "rag_initialized": rag_system is not None,
278
  "index_exists": os.path.exists(config.INDEX_DIR),
279
+ "backend": "OpenAI"
280
  }
281
 
 
 
 
 
282
  if os.path.exists(config.DATA_DIR):
283
+ files = [f for f in os.listdir(config.DATA_DIR) if f.endswith(('.txt', '.jsonl', '.json'))]
284
  stats["uploaded_files"] = len(files)
285
  else:
286
  stats["uploaded_files"] = 0
287
 
288
  return stats
289
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
290
  if __name__ == "__main__":
291
  import uvicorn
292
  uvicorn.run(app, host="0.0.0.0", port=7860)
query_index.py ADDED
@@ -0,0 +1,277 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import logging
3
+ from typing import Dict, List, Optional
4
+ import json
5
+ from dotenv import load_dotenv
6
+
7
+ from llama_index.core import (
8
+ VectorStoreIndex,
9
+ StorageContext,
10
+ load_index_from_storage,
11
+ Settings,
12
+ Document,
13
+ SimpleDirectoryReader
14
+ )
15
+ from llama_index.core.node_parser import SentenceSplitter
16
+ from llama_index.embeddings.openai import OpenAIEmbedding
17
+ from llama_index.llms.openai import OpenAI
18
+
19
+ # Load environment variables
20
+ load_dotenv()
21
+
22
+ # Configure logging
23
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
24
+ logger = logging.getLogger(__name__)
25
+
26
+ class RAGConfig:
27
+ """Configuration for the RAG Pipeline"""
28
+ # Use relative paths for better portability
29
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
30
+ DATA_DIR = os.path.join(BASE_DIR, "data")
31
+ INDEX_DIR = os.path.join(BASE_DIR, "rag_index")
32
+ JSONL_PATH = os.path.join(DATA_DIR, "all_articles.json") # Default path
33
+
34
+ # Embedding Settings
35
+ OPENAI_EMBEDDING_MODEL = "text-embedding-3-small"
36
+
37
+ # LLM Settings - Switched to GPT-3.5 Turbo
38
+ LLM_MODEL = "gpt-3.5-turbo"
39
+
40
+ CHUNK_SIZE = 1024
41
+ CHUNK_OVERLAP = 200
42
+ TOP_K = 5
43
+ TEMPERATURE = 0.1
44
+
45
+ def load_documents_from_jsonl(json_path: str) -> List[Document]:
46
+ """
47
+ Load documents from a JSON/JSONL file
48
+ """
49
+ documents = []
50
+ logger.info(f"Loading documents from {json_path}...")
51
+
52
+ if not os.path.exists(json_path):
53
+ logger.error(f"JSON file not found: {json_path}")
54
+ return documents
55
+
56
+ with open(json_path, 'r', encoding='utf-8') as f:
57
+ try:
58
+ # Try loading as standard JSON list first
59
+ data = json.load(f)
60
+ if isinstance(data, list):
61
+ data_list = data
62
+ else:
63
+ data_list = [data]
64
+ except json.JSONDecodeError:
65
+ # Try JSONL (line by line)
66
+ f.seek(0)
67
+ data_list = []
68
+ for line in f:
69
+ if line.strip():
70
+ try:
71
+ data_list.append(json.loads(line))
72
+ except:
73
+ continue
74
+
75
+ for item in data_list:
76
+ text = item.get('full_text') or item.get('text') or ''
77
+ if not text:
78
+ continue
79
+
80
+ # Extract simple metadata
81
+ metadata = {
82
+ 'title': item.get('title', ''),
83
+ 'journal': item.get('journal', ''),
84
+ 'year': item.get('year', ''),
85
+ 'pmcid': item.get('pmcid', '')
86
+ }
87
+
88
+ doc = Document(text=text, metadata=metadata)
89
+ documents.append(doc)
90
+
91
+ logger.info(f"✓ Loaded {len(documents)} documents")
92
+ return documents
93
+
94
+ def load_documents_from_text_files(data_dir: str) -> List[Document]:
95
+ """
96
+ Load documents from text files in a directory
97
+ """
98
+ if not os.path.exists(data_dir):
99
+ return []
100
+
101
+ reader = SimpleDirectoryReader(
102
+ input_dir=data_dir,
103
+ required_exts=[".txt", ".md"],
104
+ recursive=False
105
+ )
106
+ return reader.load_data()
107
+
108
+ def build_rag_index(documents: List[Document], config: RAGConfig):
109
+ """
110
+ Build the RAG index using OpenAI embeddings
111
+ """
112
+ if not os.getenv("OPENAI_API_KEY"):
113
+ raise ValueError("OPENAI_API_KEY not found. Cannot build index.")
114
+
115
+ logger.info("Initializing OpenAI Embeddings...")
116
+ embed_model = OpenAIEmbedding(
117
+ model=config.OPENAI_EMBEDDING_MODEL,
118
+ embed_batch_size=10
119
+ )
120
+
121
+ llm = OpenAI(
122
+ model=config.LLM_MODEL,
123
+ temperature=config.TEMPERATURE
124
+ )
125
+
126
+ Settings.embed_model = embed_model
127
+ Settings.llm = llm
128
+ Settings.chunk_size = config.CHUNK_SIZE
129
+ Settings.chunk_overlap = config.CHUNK_OVERLAP
130
+
131
+ logger.info(f"Building index from {len(documents)} documents...")
132
+ node_parser = SentenceSplitter(
133
+ chunk_size=config.CHUNK_SIZE,
134
+ chunk_overlap=config.CHUNK_OVERLAP
135
+ )
136
+
137
+ index = VectorStoreIndex.from_documents(
138
+ documents,
139
+ transformations=[node_parser],
140
+ show_progress=True
141
+ )
142
+
143
+ logger.info(f"Saving index to {config.INDEX_DIR}...")
144
+ os.makedirs(config.INDEX_DIR, exist_ok=True)
145
+ index.storage_context.persist(persist_dir=config.INDEX_DIR)
146
+
147
+ logger.info("✓ Index built and saved successfully!")
148
+ return index, llm
149
+
150
+
151
+ class USArmyRAG:
152
+ """
153
+ Question-Answering system for US Army papers
154
+ """
155
+
156
+ def __init__(self, index, llm, config: RAGConfig):
157
+ self.index = index
158
+ self.llm = llm
159
+ self.config = config
160
+
161
+ # Create query engine with custom prompt
162
+ # OpenAI LLM integration is handled automatically by LlamaIndex query engine
163
+ self.query_engine = index.as_query_engine(
164
+ similarity_top_k=config.TOP_K,
165
+ response_mode="compact",
166
+ llm=llm
167
+ )
168
+
169
+ def ask(self, question: str) -> Dict:
170
+ """
171
+ Ask a question and get an answer with sources
172
+ """
173
+ logger.info(f"Processing query: {question}")
174
+
175
+ # Query the index
176
+ response = self.query_engine.query(question)
177
+
178
+ # Extract answer
179
+ answer = str(response)
180
+
181
+ # Extract sources
182
+ sources = []
183
+ for node in response.source_nodes:
184
+ source_info = {
185
+ 'text': node.text[:200] + "...",
186
+ 'score': node.score,
187
+ 'metadata': node.metadata
188
+ }
189
+ sources.append(source_info)
190
+
191
+ logger.info(f"Query processed. Found {len(sources)} sources.")
192
+
193
+ return {
194
+ 'answer': answer,
195
+ 'sources': sources
196
+ }
197
+
198
+ def print_response(self, result: Dict):
199
+ """
200
+ Pretty print the response
201
+ """
202
+ print("\n📝 ANSWER:")
203
+ print(result['answer'])
204
+
205
+ print("\n\n📚 SOURCES:")
206
+ for i, source in enumerate(result['sources'], 1):
207
+ print(f"\n{i}. {source['metadata'].get('title', 'Unknown')}")
208
+ print(f" Journal: {source['metadata'].get('journal', 'N/A')}")
209
+ print(f" Year: {source['metadata'].get('year', 'N/A')}")
210
+ print(f" Relevance Score: {source['score']:.3f}")
211
+ print(f" Excerpt: {source['text']}")
212
+
213
+ def load_existing_index(config: RAGConfig):
214
+ """
215
+ Load a previously built index from disk
216
+ """
217
+ logger.info(f"Loading existing index from {config.INDEX_DIR}...")
218
+
219
+ # Configure embeddings - Strictly use OpenAI as requested
220
+ if not os.getenv("OPENAI_API_KEY"):
221
+ raise ValueError("OPENAI_API_KEY not found in environment variables. Please set it to use this script.")
222
+
223
+ logger.info(f"Using OpenAI Embeddings: {config.OPENAI_EMBEDDING_MODEL}")
224
+ embed_model = OpenAIEmbedding(
225
+ model=config.OPENAI_EMBEDDING_MODEL,
226
+ embed_batch_size=10
227
+ )
228
+
229
+ # Configure OpenAI LLM (GPT-3.5 Turbo)
230
+ logger.info(f"Setting up LLM: {config.LLM_MODEL}...")
231
+ llm = OpenAI(
232
+ model=config.LLM_MODEL,
233
+ temperature=config.TEMPERATURE
234
+ )
235
+
236
+ Settings.embed_model = embed_model
237
+ Settings.llm = llm
238
+
239
+ # Load from storage
240
+ if not os.path.exists(config.INDEX_DIR):
241
+ raise FileNotFoundError(f"Index directory not found at {config.INDEX_DIR}. Please run build_index.py first.")
242
+
243
+ storage_context = StorageContext.from_defaults(persist_dir=config.INDEX_DIR)
244
+ index = load_index_from_storage(storage_context)
245
+
246
+ logger.info("✓ Index loaded successfully!")
247
+ return index, llm
248
+
249
+ def main():
250
+ config = RAGConfig()
251
+
252
+ print("Loading RAG system...")
253
+ try:
254
+ index, llm = load_existing_index(config)
255
+ rag = USArmyRAG(index, llm, config)
256
+
257
+ print("\n" + "="*50)
258
+ print(f"RAG System Ready (Model: {config.LLM_MODEL})")
259
+ print("="*50)
260
+ print("Type 'exit' to quit.\n")
261
+
262
+ while True:
263
+ question = input("Ask a question: ")
264
+ if question.lower() in ['exit', 'quit', 'q']:
265
+ break
266
+
267
+ print("\nThinking...")
268
+ result = rag.ask(question)
269
+ rag.print_response(result)
270
+ print("\n" + "-"*50 + "\n")
271
+
272
+ except Exception as e:
273
+ print(f"Error initializing RAG system: {e}")
274
+ print("Make sure 'rag_index' directory exists and you have set up the environment.")
275
+
276
+ if __name__ == "__main__":
277
+ main()
requirements.txt CHANGED
@@ -1,29 +1,19 @@
1
  # FastAPI and server
2
- fastapi==0.109.0
3
- uvicorn[standard]==0.27.0
4
- pydantic==2.12.3
5
- python-multipart==0.0.6
6
 
7
- # LlamaIndex
8
- llama-index==0.14.10
9
- llama-index-core==0.14.10
10
- llama-index-embeddings-huggingface==0.6.1
11
-
12
- # Transformers and ML
13
- torch==2.9.0
14
- transformers==4.57.2
15
- sentence-transformers==5.1.2
16
- accelerate==1.12.0
17
- bitsandbytes==0.48.2
18
-
19
- # Vector store
20
- chromadb==1.3.5
21
 
22
  # Utilities
23
- python-dotenv==1.2.1
24
- numpy==2.0.2
25
- pandas==2.2.2
26
- aiofiles==23.2.1
27
 
28
  # Monitoring
29
- prometheus-client==0.19.0
 
1
  # FastAPI and server
2
+ fastapi
3
+ uvicorn[standard]
4
+ pydantic
5
+ python-multipart
6
 
7
+ # LlamaIndex Core & OpenAI
8
+ llama-index-core
9
+ llama-index-llms-openai
10
+ llama-index-embeddings-openai
 
 
 
 
 
 
 
 
 
 
11
 
12
  # Utilities
13
+ python-dotenv
14
+ numpy
15
+ pandas
16
+ aiofiles
17
 
18
  # Monitoring
19
+ prometheus-client