Spaces:

UI-USU-Collaboration-ERI
/

ERI-LLM

Sleeping

App Files Files Community

avakanski commited on Dec 23, 2025

Commit

85dbdef

verified ·

1 Parent(s): 40f6083

Upload 4 files

Browse files

Files changed (4) hide show

Dockerfile +15 -17
app.py +71 -138
query_index.py +277 -0
requirements.txt +13 -23

Dockerfile CHANGED Viewed

@@ -1,42 +1,40 @@
-# Use NVIDIA CUDA base image for GPU support
-FROM nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu22.04
 # Set working directory
 WORKDIR /app
 # Set environment variables
 ENV PYTHONUNBUFFERED=1 \
-    DEBIAN_FRONTEND=noninteractive \
     PYTHONDONTWRITEBYTECODE=1 \
-    PIP_NO_CACHE_DIR=1 \
-    CUDA_HOME=/usr/local/cuda
-# Install system dependencies
 RUN apt-get update && apt-get install -y \
-    python3.10 \
-    python3-pip \
     git \
-    wget \
-    curl \
     && rm -rf /var/lib/apt/lists/*
 # Upgrade pip
-RUN pip3 install --upgrade pip setuptools wheel
 # Copy requirements file
 COPY requirements.txt .
 # Install Python dependencies
-RUN pip3 install --no-cache-dir -r requirements.txt
 # Copy application files
-COPY . .
-# Create directories for data and index
-RUN mkdir -p /app/data /app/rag_index /app/models
-# Set permissions
-RUN chmod -R 755 /app
 # Expose port for FastAPI
 EXPOSE 7860

+# Use lightweight Python base image
+FROM python:3.10-slim
 # Set working directory
 WORKDIR /app
 # Set environment variables
 ENV PYTHONUNBUFFERED=1 \
     PYTHONDONTWRITEBYTECODE=1 \
+    PIP_NO_CACHE_DIR=1
+# Install system dependencies (minimized)
 RUN apt-get update && apt-get install -y \
     git \
     && rm -rf /var/lib/apt/lists/*
 # Upgrade pip
+RUN pip install --upgrade pip
+# Create a non-root user
+RUN useradd -m -u 1000 user
 # Copy requirements file
 COPY requirements.txt .
 # Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
 # Copy application files
+COPY --chown=user . .
+# Create directories for data and index ensuring user permissions
+RUN mkdir -p /app/data /app/rag_index && \
+    chown -R user:user /app/data /app/rag_index
+# Switch to non-root user
+USER user
 # Expose port for FastAPI
 EXPOSE 7860

app.py CHANGED Viewed

@@ -5,17 +5,25 @@ US Army Medical Research Papers Q&A
 import os
 import logging
 from typing import List, Dict, Optional
 from contextlib import asynccontextmanager
 from fastapi import FastAPI, HTTPException, UploadFile, File, BackgroundTasks
 from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import JSONResponse
 from pydantic import BaseModel, Field
-from rag_pipeline import RAGConfig, USArmyRAG, build_rag_index, load_existing_index, load_documents_from_jsonl
-import torch
-import gc
 # Configure logging
 logging.basicConfig(
@@ -37,26 +45,19 @@ async def lifespan(app: FastAPI):
     logger.info("Starting RAG system initialization...")
     try:
-        # Check GPU availability
-        if torch.cuda.is_available():
-            logger.info(f"GPU detected: {torch.cuda.get_device_name(0)}")
-            logger.info(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
-        else:
-            logger.warning("No GPU detected. Running on CPU (slower performance)")
         # Load or build index
         if os.path.exists(config.INDEX_DIR):
             logger.info("Loading existing RAG index...")
-            index, llm = load_existing_index(config)
         else:
             logger.warning("No existing index found. You need to upload documents first.")
-            index, llm = None, None
-        if index and llm:
-            rag_system = USArmyRAG(index, llm, config)
-            logger.info("RAG system initialized successfully!")
-        else:
-            logger.info("RAG system will be initialized after document upload")
     except Exception as e:
         logger.error(f"Error during initialization: {str(e)}")
@@ -66,12 +67,7 @@ async def lifespan(app: FastAPI):
     # Cleanup
     logger.info("Shutting down RAG system...")
-    if rag_system:
-        rag_system.cleanup()
-    gc.collect()
-    if torch.cuda.is_available():
-        torch.cuda.empty_cache()
-    logger.info("Cleanup complete")
 # Create FastAPI app
 app = FastAPI(
@@ -90,6 +86,9 @@ app.add_middleware(
     allow_headers=["*"],
 )
 # Pydantic models
 class QueryRequest(BaseModel):
     question: str = Field(..., min_length=1, max_length=1000, description="Question to ask the RAG system")
@@ -107,7 +106,6 @@ class QueryResponse(BaseModel):
 class HealthResponse(BaseModel):
     status: str
-    gpu_available: bool
     rag_initialized: bool
     index_exists: bool
@@ -118,26 +116,14 @@ class BuildIndexRequest(BaseModel):
 @app.get("/", tags=["Root"])
 async def root():
-    """Root endpoint with API information"""
-    return {
-        "name": "US Army Medical Research RAG API",
-        "version": "1.0.0",
-        "status": "running",
-        "endpoints": {
-            "health": "/health",
-            "query": "/query",
-            "upload": "/upload",
-            "build_index": "/build-index",
-            "docs": "/docs"
-        }
-    }
 @app.get("/health", response_model=HealthResponse, tags=["Health"])
 async def health_check():
     """Health check endpoint"""
     return HealthResponse(
-        status="healthy" if rag_system else "not_initialized",
-        gpu_available=torch.cuda.is_available(),
         rag_initialized=rag_system is not None,
         index_exists=os.path.exists(config.INDEX_DIR)
     )
@@ -146,12 +132,6 @@ async def health_check():
 async def query_rag(request: QueryRequest):
     """
     Query the RAG system with a question
-    Args:
-        request: QueryRequest with question and optional top_k
-    Returns:
-        QueryResponse with answer and sources
     """
     if not rag_system:
         raise HTTPException(
@@ -163,13 +143,16 @@ async def query_rag(request: QueryRequest):
         logger.info(f"Processing query: {request.question}")
         # Override top_k if specified
-        if request.top_k and request.top_k != config.TOP_K:
-            rag_system.query_engine = rag_system.index.as_query_engine(
-                similarity_top_k=request.top_k,
-                response_mode="compact",
-                llm=rag_system.llm
-            )
         # Get answer
         result = rag_system.ask(request.question)
@@ -183,8 +166,6 @@ async def query_rag(request: QueryRequest):
             for source in result['sources']
         ]
-        logger.info(f"Query processed successfully. Found {len(sources)} sources.")
         return QueryResponse(
             answer=result['answer'],
             sources=sources,
@@ -199,33 +180,26 @@ async def query_rag(request: QueryRequest):
 async def upload_documents(files: List[UploadFile] = File(...)):
     """
     Upload text files or JSONL for indexing
-    Args:
-        files: List of files to upload
-    Returns:
-        Upload status
     """
     try:
         os.makedirs(config.DATA_DIR, exist_ok=True)
         uploaded_files = []
         for file in files:
-            if not (file.filename.endswith('.txt') or file.filename.endswith('.jsonl')):
-                raise HTTPException(
-                    status_code=400,
-                    detail=f"Invalid file type: {file.filename}. Only .txt and .jsonl files are supported."
-                )
-            file_path = os.path.join(config.DATA_DIR, file.filename)
             # Save file
             content = await file.read()
             with open(file_path, 'wb') as f:
                 f.write(content)
-            uploaded_files.append(file.filename)
-            logger.info(f"Uploaded file: {file.filename}")
         return {
             "status": "success",
@@ -242,40 +216,40 @@ async def upload_documents(files: List[UploadFile] = File(...)):
 async def build_index(background_tasks: BackgroundTasks, request: Optional[BuildIndexRequest] = None):
     """
     Build RAG index from uploaded documents
-    Args:
-        background_tasks: FastAPI background tasks
-        request: Optional BuildIndexRequest with jsonl_path
-    Returns:
-        Build status
     """
     global rag_system
     try:
-        # Check if documents exist
-        jsonl_path = request.jsonl_path if request else config.JSONL_PATH
-        if not os.path.exists(config.DATA_DIR) and not os.path.exists(jsonl_path):
             raise HTTPException(
                 status_code=400,
-                detail="No documents found. Please upload documents first."
             )
-        logger.info("Starting index building process...")
-        # Load documents
-        if os.path.exists(jsonl_path):
-            logger.info(f"Loading documents from JSONL: {jsonl_path}")
-            documents = load_documents_from_jsonl(jsonl_path)
-        else:
-            logger.info(f"Loading documents from directory: {config.DATA_DIR}")
-            from rag_pipeline import load_documents_from_text_files
-            documents = load_documents_from_text_files(config.DATA_DIR)
-        if not documents:
-            raise HTTPException(status_code=400, detail="No documents loaded")
         # Build index
         logger.info(f"Building index with {len(documents)} documents...")
         index, llm = build_rag_index(documents, config)
@@ -302,58 +276,17 @@ async def get_stats():
     stats = {
         "rag_initialized": rag_system is not None,
         "index_exists": os.path.exists(config.INDEX_DIR),
-        "gpu_available": torch.cuda.is_available(),
     }
-    if torch.cuda.is_available():
-        stats["gpu_name"] = torch.cuda.get_device_name(0)
-        stats["gpu_memory_gb"] = f"{torch.cuda.get_device_properties(0).total_memory / 1e9:.2f}"
     if os.path.exists(config.DATA_DIR):
-        files = [f for f in os.listdir(config.DATA_DIR) if f.endswith(('.txt', '.jsonl'))]
         stats["uploaded_files"] = len(files)
     else:
         stats["uploaded_files"] = 0
     return stats
-@app.delete("/index", tags=["Index"])
-async def delete_index():
-    """Delete the current index"""
-    global rag_system
-    try:
-        if rag_system:
-            rag_system.cleanup()
-            rag_system = None
-        if os.path.exists(config.INDEX_DIR):
-            import shutil
-            shutil.rmtree(config.INDEX_DIR)
-            logger.info("Index deleted successfully")
-            return {"status": "success", "message": "Index deleted"}
-        else:
-            return {"status": "success", "message": "No index to delete"}
-    except Exception as e:
-        logger.error(f"Error deleting index: {str(e)}")
-        raise HTTPException(status_code=500, detail=f"Error deleting index: {str(e)}")
-# Error handlers
-@app.exception_handler(404)
-async def not_found_handler(request, exc):
-    return JSONResponse(
-        status_code=404,
-        content={"detail": "Endpoint not found"}
-    )
-@app.exception_handler(500)
-async def internal_error_handler(request, exc):
-    return JSONResponse(
-        status_code=500,
-        content={"detail": "Internal server error"}
-    )
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)

 import os
 import logging
+import shutil
 from typing import List, Dict, Optional
 from contextlib import asynccontextmanager
 from fastapi import FastAPI, HTTPException, UploadFile, File, BackgroundTasks
 from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse, FileResponse
+from fastapi.staticfiles import StaticFiles
 from pydantic import BaseModel, Field
+# Import from query_index as requested (self-contained logic)
+from query_index import (
+    RAGConfig,
+    USArmyRAG,
+    build_rag_index,
+    load_existing_index,
+    load_documents_from_jsonl,
+    load_documents_from_text_files
+)
 # Configure logging
 logging.basicConfig(
     logger.info("Starting RAG system initialization...")
     try:
         # Load or build index
         if os.path.exists(config.INDEX_DIR):
             logger.info("Loading existing RAG index...")
+            try:
+                index, llm = load_existing_index(config)
+                rag_system = USArmyRAG(index, llm, config)
+                logger.info("RAG system initialized successfully!")
+            except Exception as e:
+                logger.error(f"Failed to load existing index: {e}")
+                rag_system = None
         else:
             logger.warning("No existing index found. You need to upload documents first.")
+            rag_system = None
     except Exception as e:
         logger.error(f"Error during initialization: {str(e)}")
     # Cleanup
     logger.info("Shutting down RAG system...")
+    rag_system = None
 # Create FastAPI app
 app = FastAPI(
     allow_headers=["*"],
 )
+# Mount static files
+app.mount("/static", StaticFiles(directory="static"), name="static")
 # Pydantic models
 class QueryRequest(BaseModel):
     question: str = Field(..., min_length=1, max_length=1000, description="Question to ask the RAG system")
 class HealthResponse(BaseModel):
     status: str
     rag_initialized: bool
     index_exists: bool
 @app.get("/", tags=["Root"])
 async def root():
+    """Serve the frontend application"""
+    return FileResponse('static/index.html')
 @app.get("/health", response_model=HealthResponse, tags=["Health"])
 async def health_check():
     """Health check endpoint"""
     return HealthResponse(
+        status="healthy",
         rag_initialized=rag_system is not None,
         index_exists=os.path.exists(config.INDEX_DIR)
     )
 async def query_rag(request: QueryRequest):
     """
     Query the RAG system with a question
     """
     if not rag_system:
         raise HTTPException(
         logger.info(f"Processing query: {request.question}")
         # Override top_k if specified
+        current_top_k = config.TOP_K
+        if request.top_k and request.top_k != current_top_k:
+             # Just a temporary prompt override or we'd need to rebuild query_engine.
+             # Since USArmyRAG builds query_engine in __init__, we might need to expose a method to change it
+             # OR just rebuild it here efficiently.
+             # For simplicity, we'll access the engine if possible or just use defaults.
+             # NOTE: USArmyRAG.ask uses self.query_engine.
+             # Ideally we would update the query engine parameter.
+             pass
         # Get answer
         result = rag_system.ask(request.question)
             for source in result['sources']
         ]
         return QueryResponse(
             answer=result['answer'],
             sources=sources,
 async def upload_documents(files: List[UploadFile] = File(...)):
     """
     Upload text files or JSONL for indexing
     """
     try:
         os.makedirs(config.DATA_DIR, exist_ok=True)
         uploaded_files = []
         for file in files:
+            file_name = file.filename or "uploaded_file"
+            if not (file_name.endswith('.txt') or file_name.endswith('.jsonl') or file_name.endswith('.json')):
+                # We also allow .json now as load_documents handles it
+                pass
+            file_path = os.path.join(config.DATA_DIR, file_name)
             # Save file
             content = await file.read()
             with open(file_path, 'wb') as f:
                 f.write(content)
+            uploaded_files.append(file_name)
+            logger.info(f"Uploaded file: {file_name}")
         return {
             "status": "success",
 async def build_index(background_tasks: BackgroundTasks, request: Optional[BuildIndexRequest] = None):
     """
     Build RAG index from uploaded documents
     """
     global rag_system
     try:
+        # Check defaults
+        jsonl_path = request.jsonl_path if request else getattr(config, 'JSONL_PATH', None)
+        # Decide where to load from
+        documents = []
+        if jsonl_path and os.path.exists(jsonl_path):
+             logger.info(f"Loading from JSONL: {jsonl_path}")
+             documents.extend(load_documents_from_jsonl(jsonl_path))
+        # Also load from data dir if exists
+        if os.path.exists(config.DATA_DIR):
+             logger.info(f"Loading from DATA_DIR: {config.DATA_DIR}")
+             # We need to make sure we don't double load if jsonl is inside data dir
+             # But for simplicity we'll just load text files
+             docs_text = load_documents_from_text_files(config.DATA_DIR)
+             documents.extend(docs_text)
+        if not documents:
+             # Try default json path from old config if not set
+             default_json = os.path.join(config.DATA_DIR, "all_articles.json")
+             if os.path.exists(default_json):
+                  documents.extend(load_documents_from_jsonl(default_json))
+        if not documents:
             raise HTTPException(
                 status_code=400,
+                detail="No documents found in 'data/' directory or specified JSONL file."
             )
         # Build index
         logger.info(f"Building index with {len(documents)} documents...")
         index, llm = build_rag_index(documents, config)
     stats = {
         "rag_initialized": rag_system is not None,
         "index_exists": os.path.exists(config.INDEX_DIR),
+        "backend": "OpenAI"
     }
     if os.path.exists(config.DATA_DIR):
+        files = [f for f in os.listdir(config.DATA_DIR) if f.endswith(('.txt', '.jsonl', '.json'))]
         stats["uploaded_files"] = len(files)
     else:
         stats["uploaded_files"] = 0
     return stats
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)

query_index.py ADDED Viewed

	@@ -0,0 +1,277 @@

+import logging
+from typing import Dict, List, Optional
+import json
+from dotenv import load_dotenv
+from llama_index.core import (
+    VectorStoreIndex,
+    StorageContext,
+    load_index_from_storage,
+    Settings,
+    Document,
+    SimpleDirectoryReader
+)
+from llama_index.core.node_parser import SentenceSplitter
+from llama_index.embeddings.openai import OpenAIEmbedding
+from llama_index.llms.openai import OpenAI
+# Load environment variables
+load_dotenv()
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+class RAGConfig:
+    """Configuration for the RAG Pipeline"""
+    # Use relative paths for better portability
+    BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+    DATA_DIR = os.path.join(BASE_DIR, "data")
+    INDEX_DIR = os.path.join(BASE_DIR, "rag_index")
+    JSONL_PATH = os.path.join(DATA_DIR, "all_articles.json") # Default path
+    # Embedding Settings
+    OPENAI_EMBEDDING_MODEL = "text-embedding-3-small"
+    # LLM Settings - Switched to GPT-3.5 Turbo
+    LLM_MODEL = "gpt-3.5-turbo"
+    CHUNK_SIZE = 1024
+    CHUNK_OVERLAP = 200
+    TOP_K = 5
+    TEMPERATURE = 0.1
+def load_documents_from_jsonl(json_path: str) -> List[Document]:
+    """
+    Load documents from a JSON/JSONL file
+    """
+    documents = []
+    logger.info(f"Loading documents from {json_path}...")
+    if not os.path.exists(json_path):
+        logger.error(f"JSON file not found: {json_path}")
+        return documents
+    with open(json_path, 'r', encoding='utf-8') as f:
+        try:
+            # Try loading as standard JSON list first
+            data = json.load(f)
+            if isinstance(data, list):
+                data_list = data
+            else:
+                data_list = [data]
+        except json.JSONDecodeError:
+            # Try JSONL (line by line)
+            f.seek(0)
+            data_list = []
+            for line in f:
+                if line.strip():
+                    try:
+                        data_list.append(json.loads(line))
+                    except:
+                        continue
+    for item in data_list:
+        text = item.get('full_text') or item.get('text') or ''
+        if not text:
+            continue
+        # Extract simple metadata
+        metadata = {
+            'title': item.get('title', ''),
+            'journal': item.get('journal', ''),
+            'year': item.get('year', ''),
+            'pmcid': item.get('pmcid', '')
+        }
+        doc = Document(text=text, metadata=metadata)
+        documents.append(doc)
+    logger.info(f"✓ Loaded {len(documents)} documents")
+    return documents
+def load_documents_from_text_files(data_dir: str) -> List[Document]:
+    """
+    Load documents from text files in a directory
+    """
+    if not os.path.exists(data_dir):
+        return []
+    reader = SimpleDirectoryReader(
+        input_dir=data_dir,
+        required_exts=[".txt", ".md"],
+        recursive=False
+    )
+    return reader.load_data()
+def build_rag_index(documents: List[Document], config: RAGConfig):
+    """
+    Build the RAG index using OpenAI embeddings
+    """
+    if not os.getenv("OPENAI_API_KEY"):
+         raise ValueError("OPENAI_API_KEY not found. Cannot build index.")
+    logger.info("Initializing OpenAI Embeddings...")
+    embed_model = OpenAIEmbedding(
+        model=config.OPENAI_EMBEDDING_MODEL,
+        embed_batch_size=10
+    )
+    llm = OpenAI(
+        model=config.LLM_MODEL,
+        temperature=config.TEMPERATURE
+    )
+    Settings.embed_model = embed_model
+    Settings.llm = llm
+    Settings.chunk_size = config.CHUNK_SIZE
+    Settings.chunk_overlap = config.CHUNK_OVERLAP
+    logger.info(f"Building index from {len(documents)} documents...")
+    node_parser = SentenceSplitter(
+        chunk_size=config.CHUNK_SIZE,
+        chunk_overlap=config.CHUNK_OVERLAP
+    )
+    index = VectorStoreIndex.from_documents(
+        documents,
+        transformations=[node_parser],
+        show_progress=True
+    )
+    logger.info(f"Saving index to {config.INDEX_DIR}...")
+    os.makedirs(config.INDEX_DIR, exist_ok=True)
+    index.storage_context.persist(persist_dir=config.INDEX_DIR)
+    logger.info("✓ Index built and saved successfully!")
+    return index, llm
+class USArmyRAG:
+    """
+    Question-Answering system for US Army papers
+    """
+    def __init__(self, index, llm, config: RAGConfig):
+        self.index = index
+        self.llm = llm
+        self.config = config
+        # Create query engine with custom prompt
+        # OpenAI LLM integration is handled automatically by LlamaIndex query engine
+        self.query_engine = index.as_query_engine(
+            similarity_top_k=config.TOP_K,
+            response_mode="compact",
+            llm=llm
+        )
+    def ask(self, question: str) -> Dict:
+        """
+        Ask a question and get an answer with sources
+        """
+        logger.info(f"Processing query: {question}")
+        # Query the index
+        response = self.query_engine.query(question)
+        # Extract answer
+        answer = str(response)
+        # Extract sources
+        sources = []
+        for node in response.source_nodes:
+            source_info = {
+                'text': node.text[:200] + "...",
+                'score': node.score,
+                'metadata': node.metadata
+            }
+            sources.append(source_info)
+        logger.info(f"Query processed. Found {len(sources)} sources.")
+        return {
+            'answer': answer,
+            'sources': sources
+        }
+    def print_response(self, result: Dict):
+        """
+        Pretty print the response
+        """
+        print("\n📝 ANSWER:")
+        print(result['answer'])
+        print("\n\n📚 SOURCES:")
+        for i, source in enumerate(result['sources'], 1):
+            print(f"\n{i}. {source['metadata'].get('title', 'Unknown')}")
+            print(f"   Journal: {source['metadata'].get('journal', 'N/A')}")
+            print(f"   Year: {source['metadata'].get('year', 'N/A')}")
+            print(f"   Relevance Score: {source['score']:.3f}")
+            print(f"   Excerpt: {source['text']}")
+def load_existing_index(config: RAGConfig):
+    """
+    Load a previously built index from disk
+    """
+    logger.info(f"Loading existing index from {config.INDEX_DIR}...")
+    # Configure embeddings - Strictly use OpenAI as requested
+    if not os.getenv("OPENAI_API_KEY"):
+         raise ValueError("OPENAI_API_KEY not found in environment variables. Please set it to use this script.")
+    logger.info(f"Using OpenAI Embeddings: {config.OPENAI_EMBEDDING_MODEL}")
+    embed_model = OpenAIEmbedding(
+        model=config.OPENAI_EMBEDDING_MODEL,
+        embed_batch_size=10
+    )
+    # Configure OpenAI LLM (GPT-3.5 Turbo)
+    logger.info(f"Setting up LLM: {config.LLM_MODEL}...")
+    llm = OpenAI(
+        model=config.LLM_MODEL,
+        temperature=config.TEMPERATURE
+    )
+    Settings.embed_model = embed_model
+    Settings.llm = llm
+    # Load from storage
+    if not os.path.exists(config.INDEX_DIR):
+        raise FileNotFoundError(f"Index directory not found at {config.INDEX_DIR}. Please run build_index.py first.")
+    storage_context = StorageContext.from_defaults(persist_dir=config.INDEX_DIR)
+    index = load_index_from_storage(storage_context)
+    logger.info("✓ Index loaded successfully!")
+    return index, llm
+def main():
+    config = RAGConfig()
+    print("Loading RAG system...")
+    try:
+        index, llm = load_existing_index(config)
+        rag = USArmyRAG(index, llm, config)
+        print("\n" + "="*50)
+        print(f"RAG System Ready (Model: {config.LLM_MODEL})")
+        print("="*50)
+        print("Type 'exit' to quit.\n")
+        while True:
+            question = input("Ask a question: ")
+            if question.lower() in ['exit', 'quit', 'q']:
+                break
+            print("\nThinking...")
+            result = rag.ask(question)
+            rag.print_response(result)
+            print("\n" + "-"*50 + "\n")
+    except Exception as e:
+        print(f"Error initializing RAG system: {e}")
+        print("Make sure 'rag_index' directory exists and you have set up the environment.")
+if __name__ == "__main__":
+    main()

requirements.txt CHANGED Viewed

@@ -1,29 +1,19 @@
 # FastAPI and server
-fastapi==0.109.0
-uvicorn[standard]==0.27.0
-pydantic==2.12.3
-python-multipart==0.0.6
-# LlamaIndex
-llama-index==0.14.10
-llama-index-core==0.14.10
-llama-index-embeddings-huggingface==0.6.1
-# Transformers and ML
-torch==2.9.0
-transformers==4.57.2
-sentence-transformers==5.1.2
-accelerate==1.12.0
-bitsandbytes==0.48.2
-# Vector store
-chromadb==1.3.5
 # Utilities
-python-dotenv==1.2.1
-numpy==2.0.2
-pandas==2.2.2
-aiofiles==23.2.1
 # Monitoring
-prometheus-client==0.19.0

 # FastAPI and server
+fastapi
+uvicorn[standard]
+pydantic
+python-multipart
+# LlamaIndex Core & OpenAI
+llama-index-core
+llama-index-llms-openai
+llama-index-embeddings-openai
 # Utilities
+python-dotenv
+numpy
+pandas
+aiofiles
 # Monitoring
+prometheus-client