Spaces:

VimalrajS04
/

Assignment_drac

Sleeping

App Files Files Community

VimalrajS04 commited on Nov 1, 2025

Commit

51277f6

1 Parent(s): 37f0716

upgrade it to flask

Browse files

Files changed (4) hide show

Dockerfile +15 -14
app.py +250 -432
git +0 -0
requirements.txt +1 -0

Dockerfile CHANGED Viewed

@@ -1,32 +1,33 @@
-# Use a standard Python 3.10 image
 FROM python:3.10-slim
-# Install system dependencies for OpenCV (which doctr needs)
-RUN apt-get update && apt-get install -y libgl1 libglib2.0-0 libsm6 libxext6 && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/*
-# Set the working directory inside the container
 WORKDIR /code
-# Set writable cache directories for ALL model libraries
 ENV DOCTR_CACHE_DIR="/code/.cache/doctr"
 ENV HF_HOME="/code/.cache/huggingface"
-# Pre-create all cache directories and make them writable
 RUN mkdir -p /code/.cache/doctr /code/.cache/huggingface && \
     chmod 777 -R /code/.cache
-# Copy the requirements file and install packages
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
-# Copy all your project files (app.py, data folder, etc.)
 COPY . .
-# Expose the port your app will run on
 EXPOSE 7860
-# The command to run your app
-# This is the same command you used locally!
-CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

 FROM python:3.10-slim
+# Install system dependencies for OpenCV
+RUN apt-get update && apt-get install -y \
+    libgl1 \
+    libglib2.0-0 \
+    libsm6 \
+    libxext6 \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
 WORKDIR /code
+# Set cache directories
 ENV DOCTR_CACHE_DIR="/code/.cache/doctr"
 ENV HF_HOME="/code/.cache/huggingface"
+# Create and set permissions for cache directories
 RUN mkdir -p /code/.cache/doctr /code/.cache/huggingface && \
     chmod 777 -R /code/.cache
+# Copy and install requirements
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
+# Copy application files
 COPY . .
+# Expose port
 EXPOSE 7860
+# Run the Flask+Gradio app
+CMD ["python", "app.py"]

app.py CHANGED Viewed

@@ -9,91 +9,56 @@ import os
 from groq import Groq
 import base64
 from io import BytesIO
-import fitz
-from pathlib import Path
 import time
 import shutil
-import tempfile
-# -------------------------------
-# 🚀 NEW IMPORTS FOR FASTAPI
-# -------------------------------
-from fastapi import FastAPI, UploadFile, File, HTTPException
-from fastapi.responses import RedirectResponse
-from pydantic import BaseModel
-from typing import List, Dict, Any
 from langchain_qdrant import Qdrant
 from qdrant_client import QdrantClient
 # -------------------------------
-# 1️⃣ Load OCR + Embedding Models + Groq Client
 # -------------------------------
 device = "cuda" if torch.cuda.is_available() else "cpu"
-try:
-    print(f"Loading OCR model to {device}...")
-    ocr_model = ocr_predictor(pretrained=True).to(device)
-    print("✅ OCR model loaded.")
-except Exception as e:
-    print(f"❌ Failed to load OCR model: {e}")
-    ocr_model = None
-print("Loading Embedding model...")
 embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
-print("✅ Embedding model loaded.")
-# Initialize Groq client
-if not os.environ.get("GROQ_API_KEY"):
-    print("⚠️  WARNING: GROQ_API_KEY environment variable not set.")
-    groq_client = None
-else:
-    groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
-# Model configurations
 VISION_MODEL = "meta-llama/llama-4-scout-17b-16e-instruct"
 LLM_MODEL = "llama-3.3-70b-versatile"
-# -------------------------------
-# 1b ☁️ NEW: Qdrant Cloud Configuration
-# -------------------------------
-QDRANT_URL = os.environ.get("QDRANT_URL", "https://bdf142ef-7e2a-433b-87a0-301ff303e3af.us-east4-0.gcp.cloud.qdrant.io:6333")
 QDRANT_API_KEY = os.environ.get("QDRANT_API_KEY")
 COLLECTION_NAME = "multimodal_rag_store"
-# --- Helper Functions (2 to 7) ---
 # -------------------------------
-# 2️⃣ Helper: Check if image has substantial text
 # -------------------------------
 def has_substantial_text(text, min_words=10):
-    """
-    Determines if OCR extracted enough text to consider it a text-based image.
-    """
     words = text.split()
     return len(words) >= min_words
-# -------------------------------
-# 3️⃣ Vision Analysis using Groq Llama 4 Scout
-# -------------------------------
 def analyze_image_with_vision(img_path=None, img_bytes=None, pil_image=None, max_retries=3):
-    if not groq_client:
-        return ""
     for attempt in range(max_retries):
         try:
-            img_data = None
-            img_format = "png"
             if pil_image:
                 buffered = BytesIO()
                 pil_image.save(buffered, format="PNG")
                 img_data = buffered.getvalue()
             elif img_path:
                 with open(img_path, "rb") as img_file:
                     img_data = img_file.read()
                 img_format = img_path.lower().split('.')[-1]
             elif img_bytes:
                 img_data = img_bytes
             else:
                 return ""
@@ -104,19 +69,19 @@ def analyze_image_with_vision(img_path=None, img_bytes=None, pil_image=None, max
             vision_prompt = """Analyze this image carefully and provide a detailed description:
 1. IDENTIFY THE TYPE: Is this a chart, graph, table, diagram, photograph, or text document?
 2. IF IT'S A CHART/GRAPH/TABLE:
-    - Specify the exact type
-    - List ALL categories/labels shown
-    - Describe the data values and trends
-    - Mention axis labels, title, legend if present
-    - Highlight key insights or patterns
 3. IF IT'S A PHOTOGRAPH/DIAGRAM:
-    - Describe what you see in detail
-    - Identify key objects, people, or concepts
-    - Note any text visible in the image
 4. IF IT'S A TEXT DOCUMENT:
-    - Summarize the main content and structure
 Provide a comprehensive description suitable for semantic search. Be specific and detailed."""
             chat_completion = groq_client.chat.completions.create(
                 messages=[
                     {
@@ -145,25 +110,13 @@ Provide a comprehensive description suitable for semantic search. Be specific an
                     continue
                 return ""
         except Exception as e:
-            error_msg = str(e)
-            if "model_not_found" in error_msg or "not available" in error_msg:
-                print(f"❌ Vision model '{VISION_MODEL}' not available! Skipping vision analysis.")
-                return ""
-            else:
-                if attempt < max_retries - 1:
-                    time.sleep(2)
-                    continue
-                return ""
     return ""
-# -------------------------------
-# 4️⃣ Smart OCR/Vision Extraction for Images
-# -------------------------------
 def extract_text_from_image(img_path):
-    if not ocr_model:
-        return analyze_image_with_vision(img_path=img_path)
     try:
         image = Image.open(img_path).convert("RGB")
         image_np = np.array(image)
@@ -177,90 +130,76 @@ def extract_text_from_image(img_path):
         ocr_text = "\n".join(text)
         if has_substantial_text(ocr_text, min_words=10):
-            print(f"📄 {os.path.basename(img_path)}: Using OCR (text document)")
             return ocr_text
         else:
-            print(f"🖼️  {os.path.basename(img_path)}: Using Vision Model (graph/chart/picture)")
             vision_summary = analyze_image_with_vision(img_path=img_path)
-            return vision_summary if vision_summary else ocr_text
     except Exception as e:
         print(f"❌ Error processing {img_path}: {e}")
         return ""
-# -------------------------------
-# 5️⃣ Extract Text from Plain Text Files
-# -------------------------------
 def extract_text_from_txt(file_path):
     try:
         with open(file_path, 'r', encoding='utf-8') as f:
             text = f.read()
-        print(f"📝 {os.path.basename(file_path)}: Extracted text document")
         return text
     except Exception as e:
         print(f"❌ Error reading text file {file_path}: {e}")
         return ""
-# -------------------------------
-# 6️⃣ Extract Content from PDFs with Vision Analysis
-# -------------------------------
 def extract_content_from_pdf(pdf_path):
     try:
         doc = fitz.open(pdf_path)
         all_content = []
         for page_num, page in enumerate(doc, 1):
             page_content = []
-            # 1. Extract text content
             text = page.get_text()
             if text.strip():
                 page_content.append(f"[Page {page_num} - Text Content]\n{text}")
-            # 2. Vision analysis of the entire page image
-            if groq_client:
                 try:
-                    mat = fitz.Matrix(2, 2)
-                    pix = page.get_pixmap(matrix=mat)
-                    img_data = pix.tobytes("png")
-                    page_image = Image.open(BytesIO(img_data)).convert("RGB")
-                    vision_analysis = analyze_image_with_vision(pil_image=page_image)
-                    if vision_analysis and len(vision_analysis.strip()) > 30:
-                        vision_section = f"[Page {page_num} - Visual Analysis]\n{vision_analysis}"
-                        page_content.append(vision_section)
-                except Exception:
-                    pass # Ignore rendering/analysis errors
-            # 3. OCR on embedded images (if OCR model is loaded)
-            if ocr_model:
-                image_list = page.get_images(full=True)
-                for img_index, img_info in enumerate(image_list, 1):
-                    try:
-                        xref = img_info[0]
-                        base_image = doc.extract_image(xref)
-                        image_bytes = base_image["image"]
-                        image = Image.open(BytesIO(image_bytes)).convert("RGB")
-                        image_np = np.array(image)
-                        result = ocr_model([image_np])
-                        ocr_text = []
-                        for ocr_page in result.pages:
-                            for block in ocr_page.blocks:
-                                for line in block.lines:
-                                    line_text = " ".join([word.value for word in line.words])
-                                    ocr_text.append(line_text)
-                        extracted_text = "\n".join(ocr_text)
-                        if has_substantial_text(extracted_text, min_words=10):
-                            page_content.append(f"[Page {page_num} - Embedded Image {img_index} OCR]\n{extracted_text}")
-                        else:
-                            vision_summary = analyze_image_with_vision(img_bytes=image_bytes)
-                            if vision_summary:
-                                page_content.append(
-                                    f"[Page {page_num} - Embedded Image {img_index} Analysis]\n{vision_summary}")
-                    except Exception:
-                        continue
             if page_content:
                 combined_page = "\n\n---SECTION BREAK---\n\n".join(page_content)
@@ -269,23 +208,18 @@ def extract_content_from_pdf(pdf_path):
         doc.close()
         final_content = "\n\n---PAGE BREAK---\n\n".join(all_content)
         return final_content
     except Exception as e:
         print(f"❌ Error processing PDF {pdf_path}: {e}")
         return ""
-# -------------------------------
-# 7️⃣ Process All Document Types for folder build
-# -------------------------------
 def create_documents_from_folder(folder_path):
     docs = []
     for root, dirs, files in os.walk(folder_path):
         for filename in files:
             full_path = os.path.join(root, filename)
             file_ext = filename.lower().split('.')[-1]
-            text = ""
             if file_ext in ["jpg", "jpeg", "png"]:
                 text = extract_text_from_image(full_path)
             elif file_ext in ["txt", "md"]:
@@ -294,7 +228,7 @@ def create_documents_from_folder(folder_path):
                 text = extract_content_from_pdf(full_path)
             else:
                 continue
             if text.strip():
                 relative_path = os.path.relpath(full_path, folder_path)
                 doc = Document(
@@ -307,22 +241,16 @@ def create_documents_from_folder(folder_path):
                     }
                 )
                 docs.append(doc)
     return docs
-# --- Core RAG/DB Functions (8 to 12) ---
-# -------------------------------
-# 8️⃣ Build or Update QDRANT Store
-# -------------------------------
 def build_or_update_qdrant_store(folder_path):
-    if not QDRANT_API_KEY:
-        return None
     docs = create_documents_from_folder(folder_path)
     if not docs:
         return None
     try:
         vector_store = Qdrant.from_documents(
             docs,
@@ -332,26 +260,15 @@ def build_or_update_qdrant_store(folder_path):
             collection_name=COLLECTION_NAME,
             force_recreate=True
         )
-        print(f"✅ Successfully created/updated Qdrant collection: {COLLECTION_NAME} with {len(docs)} documents.")
         return vector_store
     except Exception as e:
-        print(f"❌ Error connecting or uploading to Qdrant: {e}")
         return None
-# -------------------------------
-# 9️⃣ Query QDRANT Function with Chart-Aware Re-ranking
-# -------------------------------
 def query_qdrant_store(query_text, k=3):
-    if not QDRANT_API_KEY:
-        return []
     try:
-        client = QdrantClient(
-            url=QDRANT_URL,
-            api_key=QDRANT_API_KEY,
-            timeout=20
-        )
         vector_store = Qdrant(
             client=client,
             collection_name=COLLECTION_NAME,
@@ -360,7 +277,7 @@ def query_qdrant_store(query_text, k=3):
     except Exception as e:
         print(f"❌ Error connecting to Qdrant: {e}")
         return []
     initial_k = k * 3
     results = vector_store.similarity_search_with_score(query_text, k=initial_k)
@@ -371,26 +288,15 @@ def query_qdrant_store(query_text, k=3):
         reranked_results = []
         for doc, score in results:
             boost = 0.0
-            if "Visual Analysis]" in doc.page_content or "bar chart" in doc.page_content.lower():
-                visual_content = doc.page_content.lower()
-                if 'bar chart' in query_text.lower() and 'bar chart' in visual_content:
-                    boost += 1.0
-                elif 'pie chart' in query_text.lower() and 'pie chart' in visual_content:
-                    boost += 1.0
-                elif any(kw in query_text.lower() for kw in ['chart', 'graph']) and any(kw in visual_content for kw in ['chart', 'graph', 'plot', 'diagram', 'table']):
-                    boost += 0.5
-                else:
-                    boost += 0.2
             adjusted_score = score - boost
-            reranked_results.append((doc, adjusted_score, score))
         reranked_results.sort(key=lambda x: x[1])
-        results = [(doc, adj_score) for doc, adj_score, _ in reranked_results[:k]]
     else:
         results = results[:k]
     retrieved_docs = []
     for doc, score in results:
         retrieved_docs.append({
@@ -401,54 +307,34 @@ def query_qdrant_store(query_text, k=3):
         })
     return retrieved_docs
-# -------------------------------
-# 10️⃣ Answer Question using Llama 3.3 70B
-# -------------------------------
 def answer_question_with_llm(query_text, retrieved_docs, max_tokens=1000):
-    if not groq_client:
-        return "❌ Groq client not initialized. Cannot generate answer."
     if not retrieved_docs:
-        return "❌ No relevant documents found to answer your question."
     context_parts = []
     for i, doc in enumerate(retrieved_docs, 1):
         source = doc['source']
         content = doc['content']
-        metadata = doc['metadata']
-        timestamp = metadata.get('upload_timestamp')
-        readable_time = time.ctime(float(timestamp)) if timestamp else "N/A"
-        metadata_str = (
-            f"Source: {source}\n"
-            f"File Type: {metadata.get('file_type', 'N/A')}\n"
-            f"Uploaded/Modified: {readable_time}"
-        )
         max_content_length = 2500
         if len(content) > max_content_length:
             content = content[:max_content_length] + "...[truncated]"
-        context_parts.append(
-            f"--- Document {i} ---\n"
-            f"[METADATA]:\n{metadata_str}\n\n"
-            f"[CONTENT]:\n{content}\n"
-        )
     context = "\n".join(context_parts)
     system_prompt = """You are a concise AI assistant. Answer the user's question *only* using the provided documents.
 - Be brief and to the point.
-- If the answer is not in the documents or metadata, simply state 'That information is not available in the documents.'"""
     user_prompt = f"""DOCUMENTS:
 {context}
 QUESTION: {query_text}
-ANSWER: (Provide a concise answer based *only* on the documents)"""
     try:
         response = groq_client.chat.completions.create(
             model=LLM_MODEL,
@@ -458,45 +344,30 @@ ANSWER: (Provide a concise answer based *only* on the documents)"""
             ],
             temperature=0.2,
             max_tokens=max_tokens,
-            top_p=0.9,
         )
-        answer = response.choices[0].message.content
-        return answer
     except Exception as e:
-        return f"❌ Error generating answer: {str(e)}"
-# -------------------------------
-# 11️⃣ Core RAG Response Function
-# -------------------------------
-def get_rag_response(query_text: str, k: int = 3) -> Dict[str, Any]:
-    """Core RAG pipeline: retrieves, generates, and formats response."""
-    print(f"❓ QUERY: {query_text}")
     retrieved_docs = query_qdrant_store(query_text, k=k)
     if not retrieved_docs:
         return {
-            "answer": "❌ No relevant documents found to answer your question. Please upload files first.",
             "sources": []
         }
     answer = answer_question_with_llm(query_text, retrieved_docs)
-    sources_list = [
-        {"source": doc['source'], "score": doc['score']} for doc in retrieved_docs
-    ]
-    response_data = {
         "answer": answer,
         "sources": sources_list
     }
-    return response_data
-# -------------------------------
-# 12️⃣ Core File Processing & Qdrant Addition
-# -------------------------------
-def process_single_file(file_path: str, filename: str) -> Document:
-    """Processes a single file and returns a LangChain Document."""
     file_ext = filename.lower().split('.')[-1]
     text = ""
@@ -506,6 +377,8 @@ def process_single_file(file_path: str, filename: str) -> Document:
         text = extract_text_from_txt(file_path)
     elif file_ext == "pdf":
         text = extract_content_from_pdf(file_path)
     if text.strip():
         doc = Document(
@@ -517,14 +390,14 @@ def process_single_file(file_path: str, filename: str) -> Document:
                 "upload_timestamp": time.time()
             }
         )
         return doc
     return None
-def add_documents_to_qdrant(docs: List[Document]):
-    """Adds a list of processed documents to the Qdrant cloud."""
-    if not QDRANT_API_KEY or not docs:
         return
     try:
         client = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY)
         vector_store = Qdrant(
@@ -533,233 +406,178 @@ def add_documents_to_qdrant(docs: List[Document]):
             embeddings=embedding_model
         )
         vector_store.add_documents(docs)
-        print(f"✅ Successfully added {len(docs)} new document(s) to the cloud.")
     except Exception as e:
-        print(f"❌ Error adding documents to Qdrant: {e}")
-        raise HTTPException(status_code=500, detail=f"Error updating vector store: {e}")
-# -------------------------------
-# 🚀 14. Gradio UI Setup
-# -------------------------------
-def create_gradio_ui():
-    """
-    Creates the Gradio Blocks UI.
-    """
-    def gradio_chat_response_func(message, history):
-        """
-        The function that Gradio's ChatInterface will call.
-        """
-        response_data = get_rag_response(message, k=3)
-        answer = response_data['answer']
-        sources = response_data['sources']
-        sources_md = "\n\n---\n**📚 Sources Used:**\n"
-        for i, doc in enumerate(sources, 1):
-            sources_md += f"* **{doc['source']}** (Score: {doc['score']:.4f})\n"
-        final_response = answer + sources_md
-        return final_response
-    def gradio_upload_func(file_list):
-        """
-        The function that Gradio's Upload button will call.
-        """
-        if not file_list:
-            return "No files uploaded."
-        print("\n" + "=" * 60)
-        print("NEW GRADIO UPLOAD DETECTED: Processing files...")
-        print("=" * 60)
-        docs_to_add = []
-        processed_count = 0
-        failed_count = 0
-        for file_obj in file_list:
-            full_path = file_obj.name
-            filename = os.path.basename(full_path)
-            try:
-                doc = process_single_file(full_path, filename)
-                if doc:
-                    docs_to_add.append(doc)
-                    processed_count += 1
-                else:
-                    failed_count += 1
-            except Exception as e:
-                print(f"❌ Error processing file {filename} from Gradio: {e}")
-                failed_count += 1
-        if docs_to_add:
-            try:
-                add_documents_to_qdrant(docs_to_add)
-            except Exception as e:
-                return f"❌ Error adding documents to vector store: {e}"
-        return f"✅ Processing complete. Added {processed_count} files. Failed: {failed_count}."
-    # Create the Gradio UI using Blocks
-    with gr.Blocks(theme="soft") as demo:
-        gr.Markdown("# 🧠 Multimodal RAG System (Powered by Qdrant Cloud)")
-        with gr.Tabs():
-            # --- CHAT TAB ---
-            with gr.TabItem("Chat with Documents"):
-                gr.ChatInterface(
-                    fn=gradio_chat_response_func,
-                    title="Multimodal RAG Chat",
-                    description="Ask questions about your documents (PDFs, images, text). The system uses Llama 4 Scout for vision and Llama 3.3 70B for answers.",
-                    examples=[
-                        "What documents contain bar charts?",
-                        "Summarize the information about pollution",
-                        "What are the key findings in the environmental report?",
-                        "Describe the graphs showing water quality"
-                    ],
-                )
-            # --- UPLOAD TAB ---
-            with gr.TabItem("Upload New Documents"):
-                gr.Markdown("Upload new PDF, image, or text files to add them to the knowledge base.")
-                # Define components
-                file_uploader = gr.File(
-                    label="Upload Documents",
-                    file_count="multiple",
-                    file_types=["image", ".pdf", ".txt", ".md"],
-                    interactive=True
-                )
-                upload_button = gr.Button("Process and Add Documents", variant="primary")
-                status_output = gr.Markdown("Status: Ready to upload new documents.")
-                # Connect the upload button to the processing function
-                upload_button.click(
-                    fn=gradio_upload_func,
-                    inputs=[file_uploader],
-                    outputs=[status_output]
-                )
-        return demo
 # -------------------------------
-# 🚀 13. FastAPI App Setup
 # -------------------------------
-# Define Pydantic models for API request/response
-class QueryRequest(BaseModel):
-    query: str
-    k: int = 3
-class QueryResponse(BaseModel):
-    answer: str
-    sources: List[Dict[str, Any]]
-class UploadResponse(BaseModel):
-    message: str
-    processed_files: List[str]
-    failed_files: List[str]
-# --- FastAPI App ---
-app = FastAPI(title="🧠 Multimodal RAG API")
-@app.on_event("startup")
-def on_startup():
-    """Checks keys and builds the initial database on server startup."""
-    print("🚀 FastAPI app starting up...")
-    if not os.environ.get("GROQ_API_KEY"):
-        print("⚠️  WARNING: GROQ_API_KEY not set!")
-    if not QDRANT_API_KEY:
-        print("⚠️  WARNING: QDRANT_API_KEY not set! Database functions will fail.")
-    folder = "data"
-    if os.path.exists(folder):
-        build_or_update_qdrant_store(folder)
-    else:
-        print("ℹ️  No 'data' folder found. Skipping initial build.")
-# --- API Endpoints ---
-@app.post("/query/", response_model=QueryResponse)
-async def handle_query(request: QueryRequest):
-    """Executes a RAG query against the vector database."""
     try:
-        response_data = get_rag_response(request.query, request.k)
-        return response_data
     except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-@app.post("/upload/", response_model=UploadResponse)
-async def handle_upload(files: List[UploadFile] = File(...)):
-    """Uploads one or more files, processes them, and adds them to the vector DB."""
-    if not QDRANT_API_KEY:
-        raise HTTPException(status_code=500, detail="QDRANT_API_KEY is not set. Upload failed.")
     processed_files = []
     failed_files = []
     docs_to_add = []
     for file in files:
-        tmp_path = None
         try:
-            with tempfile.NamedTemporaryFile(delete=False, suffix=f"_{file.filename}") as tmp:
-                shutil.copyfileobj(file.file, tmp)
                 tmp_path = tmp.name
-            doc = process_single_file(tmp_path, file.filename)
             if doc:
                 docs_to_add.append(doc)
-                processed_files.append(file.filename)
             else:
-                failed_files.append(file.filename)
         except Exception as e:
             failed_files.append(file.filename)
-        finally:
-            if tmp_path and os.path.exists(tmp_path):
-                os.unlink(tmp_path)
-            file.file.close()
     if docs_to_add:
         try:
             add_documents_to_qdrant(docs_to_add)
-        except HTTPException:
-            failed_files.extend(processed_files)
-            processed_files = []
-    return {
-        "message": f"Processing complete. Added {len(processed_files)} file(s) to the database.",
         "processed_files": processed_files,
         "failed_files": failed_files
-    }
 # -------------------------------
-# 🚀 15. Create and Mount the Apps
 # -------------------------------
-@app.get("/")
-def redirect_to_ui():
-    """Redirect root to the Gradio UI"""
-    return RedirectResponse(url="/ui")
-@app.get("/api")
-def api_info():
-    """API information endpoint"""
-    return {
-        "message": "Welcome to the Multimodal RAG API",
-        "endpoints": {
-            "ui": "/ui - Gradio interface",
-            "query": "POST /query/ - Execute RAG queries",
-            "upload": "POST /upload/ - Upload and process files"
-        }
-    }
-# Create the Gradio UI
-gradio_ui = create_gradio_ui()
-# Mount the Gradio UI at /ui path
-app = gr.mount_gradio_app(app, gradio_ui, path="/ui")

 from groq import Groq
 import base64
 from io import BytesIO
+import fitz  # PyMuPDF
 import time
 import shutil
+# Flask imports
+from flask import Flask, request, jsonify
+from werkzeug.utils import secure_filename
+import tempfile
+# Qdrant imports
 from langchain_qdrant import Qdrant
 from qdrant_client import QdrantClient
 # -------------------------------
+# Configuration
 # -------------------------------
 device = "cuda" if torch.cuda.is_available() else "cpu"
+ocr_model = ocr_predictor(pretrained=True).to(device)
 embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
 VISION_MODEL = "meta-llama/llama-4-scout-17b-16e-instruct"
 LLM_MODEL = "llama-3.3-70b-versatile"
+QDRANT_URL = "https://bdf142ef-7e2a-433b-87a0-301ff303e3af.us-east4-0.gcp.cloud.qdrant.io:6333"
 QDRANT_API_KEY = os.environ.get("QDRANT_API_KEY")
 COLLECTION_NAME = "multimodal_rag_store"
 # -------------------------------
+# Helper Functions
 # -------------------------------
 def has_substantial_text(text, min_words=10):
     words = text.split()
     return len(words) >= min_words
 def analyze_image_with_vision(img_path=None, img_bytes=None, pil_image=None, max_retries=3):
     for attempt in range(max_retries):
         try:
             if pil_image:
                 buffered = BytesIO()
                 pil_image.save(buffered, format="PNG")
                 img_data = buffered.getvalue()
+                img_format = "png"
             elif img_path:
                 with open(img_path, "rb") as img_file:
                     img_data = img_file.read()
                 img_format = img_path.lower().split('.')[-1]
             elif img_bytes:
                 img_data = img_bytes
+                img_format = "png"
             else:
                 return ""
             vision_prompt = """Analyze this image carefully and provide a detailed description:
 1. IDENTIFY THE TYPE: Is this a chart, graph, table, diagram, photograph, or text document?
 2. IF IT'S A CHART/GRAPH/TABLE:
+   - Specify the exact type (bar chart, pie chart, line graph, scatter plot, table, etc.)
+   - List ALL categories/labels shown
+   - Describe the data values and trends
+   - Mention axis labels, title, legend if present
+   - Highlight key insights or patterns
 3. IF IT'S A PHOTOGRAPH/DIAGRAM:
+   - Describe what you see in detail
+   - Identify key objects, people, or concepts
+   - Note any text visible in the image
 4. IF IT'S A TEXT DOCUMENT:
+   - Summarize the main content and structure
 Provide a comprehensive description suitable for semantic search. Be specific and detailed."""
             chat_completion = groq_client.chat.completions.create(
                 messages=[
                     {
                     continue
                 return ""
         except Exception as e:
+            if attempt < max_retries - 1:
+                time.sleep(2)
+                continue
+            return ""
     return ""
 def extract_text_from_image(img_path):
     try:
         image = Image.open(img_path).convert("RGB")
         image_np = np.array(image)
         ocr_text = "\n".join(text)
         if has_substantial_text(ocr_text, min_words=10):
+            print(f"📄 {os.path.basename(img_path)}: Using OCR")
             return ocr_text
         else:
+            print(f"🖼️  {os.path.basename(img_path)}: Using Vision Model")
             vision_summary = analyze_image_with_vision(img_path=img_path)
+            return vision_summary if vision_summary else ocr_text
     except Exception as e:
         print(f"❌ Error processing {img_path}: {e}")
         return ""
 def extract_text_from_txt(file_path):
     try:
         with open(file_path, 'r', encoding='utf-8') as f:
             text = f.read()
+        print(f"📝 {os.path.basename(file_path)}: Extracted text")
         return text
     except Exception as e:
         print(f"❌ Error reading text file {file_path}: {e}")
         return ""
 def extract_content_from_pdf(pdf_path):
     try:
         doc = fitz.open(pdf_path)
         all_content = []
         for page_num, page in enumerate(doc, 1):
             page_content = []
             text = page.get_text()
             if text.strip():
                 page_content.append(f"[Page {page_num} - Text Content]\n{text}")
+            try:
+                mat = fitz.Matrix(2, 2)
+                pix = page.get_pixmap(matrix=mat)
+                img_data = pix.tobytes("png")
+                page_image = Image.open(BytesIO(img_data)).convert("RGB")
+                vision_analysis = analyze_image_with_vision(pil_image=page_image)
+                if vision_analysis and len(vision_analysis.strip()) > 30:
+                    page_content.append(f"[Page {page_num} - Visual Analysis]\n{vision_analysis}")
+            except Exception as e:
+                print(f"❌ Error rendering page {page_num}: {e}")
+            image_list = page.get_images(full=True)
+            for img_index, img_info in enumerate(image_list, 1):
                 try:
+                    xref = img_info[0]
+                    base_image = doc.extract_image(xref)
+                    image_bytes = base_image["image"]
+                    image = Image.open(BytesIO(image_bytes)).convert("RGB")
+                    image_np = np.array(image)
+                    result = ocr_model([image_np])
+                    ocr_text = []
+                    for ocr_page in result.pages:
+                        for block in ocr_page.blocks:
+                            for line in block.lines:
+                                line_text = " ".join([word.value for word in line.words])
+                                ocr_text.append(line_text)
+                    extracted_text = "\n".join(ocr_text)
+                    if has_substantial_text(extracted_text, min_words=10):
+                        page_content.append(f"[Page {page_num} - Embedded Image {img_index} OCR]\n{extracted_text}")
+                    else:
+                        vision_summary = analyze_image_with_vision(img_bytes=image_bytes)
+                        if vision_summary:
+                            page_content.append(f"[Page {page_num} - Embedded Image {img_index} Analysis]\n{vision_summary}")
+                except Exception as e:
+                    print(f"❌ Error processing embedded image {img_index}: {e}")
+                    continue
             if page_content:
                 combined_page = "\n\n---SECTION BREAK---\n\n".join(page_content)
         doc.close()
         final_content = "\n\n---PAGE BREAK---\n\n".join(all_content)
         return final_content
     except Exception as e:
         print(f"❌ Error processing PDF {pdf_path}: {e}")
         return ""
 def create_documents_from_folder(folder_path):
     docs = []
     for root, dirs, files in os.walk(folder_path):
         for filename in files:
             full_path = os.path.join(root, filename)
             file_ext = filename.lower().split('.')[-1]
+            text = ""
             if file_ext in ["jpg", "jpeg", "png"]:
                 text = extract_text_from_image(full_path)
             elif file_ext in ["txt", "md"]:
                 text = extract_content_from_pdf(full_path)
             else:
                 continue
             if text.strip():
                 relative_path = os.path.relpath(full_path, folder_path)
                 doc = Document(
                     }
                 )
                 docs.append(doc)
+                print(f"✅ Added {filename}")
     return docs
 def build_or_update_qdrant_store(folder_path):
+    print("\n🔄 Building Qdrant collection...")
     docs = create_documents_from_folder(folder_path)
     if not docs:
+        print("⚠️  No valid documents found!")
         return None
     try:
         vector_store = Qdrant.from_documents(
             docs,
             collection_name=COLLECTION_NAME,
             force_recreate=True
         )
+        print(f"✅ Created collection with {len(docs)} documents")
         return vector_store
     except Exception as e:
+        print(f"❌ Error with Qdrant: {e}")
         return None
 def query_qdrant_store(query_text, k=3):
     try:
+        client = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY, timeout=20)
         vector_store = Qdrant(
             client=client,
             collection_name=COLLECTION_NAME,
     except Exception as e:
         print(f"❌ Error connecting to Qdrant: {e}")
         return []
     initial_k = k * 3
     results = vector_store.similarity_search_with_score(query_text, k=initial_k)
         reranked_results = []
         for doc, score in results:
             boost = 0.0
+            if "Visual Analysis]" in doc.page_content:
+                boost += 0.5
             adjusted_score = score - boost
+            reranked_results.append((doc, adjusted_score))
         reranked_results.sort(key=lambda x: x[1])
+        results = reranked_results[:k]
     else:
         results = results[:k]
     retrieved_docs = []
     for doc, score in results:
         retrieved_docs.append({
         })
     return retrieved_docs
 def answer_question_with_llm(query_text, retrieved_docs, max_tokens=1000):
     if not retrieved_docs:
+        return "❌ No relevant documents found."
     context_parts = []
     for i, doc in enumerate(retrieved_docs, 1):
         source = doc['source']
         content = doc['content']
         max_content_length = 2500
         if len(content) > max_content_length:
             content = content[:max_content_length] + "...[truncated]"
+        context_parts.append(f"--- Document {i} ---\nSource: {source}\n\n{content}\n")
     context = "\n".join(context_parts)
     system_prompt = """You are a concise AI assistant. Answer the user's question *only* using the provided documents.
 - Be brief and to the point.
+- If the answer is not in the documents, state 'That information is not available in the documents.'"""
     user_prompt = f"""DOCUMENTS:
 {context}
 QUESTION: {query_text}
+ANSWER:"""
     try:
         response = groq_client.chat.completions.create(
             model=LLM_MODEL,
             ],
             temperature=0.2,
             max_tokens=max_tokens,
         )
+        return response.choices[0].message.content
     except Exception as e:
+        return f"❌ Error: {str(e)}"
+def get_rag_response(query_text, k=3):
+    print(f"\n❓ Query: {query_text}")
     retrieved_docs = query_qdrant_store(query_text, k=k)
     if not retrieved_docs:
         return {
+            "answer": "❌ No relevant documents found.",
             "sources": []
         }
     answer = answer_question_with_llm(query_text, retrieved_docs)
+    sources_list = [{"source": doc['source'], "score": doc['score']} for doc in retrieved_docs]
+    return {
         "answer": answer,
         "sources": sources_list
     }
+def process_single_file(file_path, filename):
     file_ext = filename.lower().split('.')[-1]
     text = ""
         text = extract_text_from_txt(file_path)
     elif file_ext == "pdf":
         text = extract_content_from_pdf(file_path)
+    else:
+        return None
     if text.strip():
         doc = Document(
                 "upload_timestamp": time.time()
             }
         )
+        print(f"✅ Processed {filename}")
         return doc
     return None
+def add_documents_to_qdrant(docs):
+    if not docs:
         return
     try:
         client = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY)
         vector_store = Qdrant(
             embeddings=embedding_model
         )
         vector_store.add_documents(docs)
+        print(f"✅ Added {len(docs)} documents to Qdrant")
     except Exception as e:
+        print(f"❌ Error adding to Qdrant: {e}")
+        raise
 # -------------------------------
+# Flask App Setup
 # -------------------------------
+flask_app = Flask(__name__)
+@flask_app.route('/api/query', methods=['POST'])
+def handle_query():
+    data = request.get_json()
+    query = data.get('query', '')
+    k = data.get('k', 3)
+    if not query:
+        return jsonify({"error": "No query provided"}), 400
     try:
+        response_data = get_rag_response(query, k)
+        return jsonify(response_data)
     except Exception as e:
+        return jsonify({"error": str(e)}), 500
+@flask_app.route('/api/upload', methods=['POST'])
+def handle_upload():
+    if 'files' not in request.files:
+        return jsonify({"error": "No files provided"}), 400
+    files = request.files.getlist('files')
     processed_files = []
     failed_files = []
     docs_to_add = []
     for file in files:
+        if file.filename == '':
+            continue
         try:
+            filename = secure_filename(file.filename)
+            with tempfile.NamedTemporaryFile(delete=False, suffix=filename) as tmp:
+                file.save(tmp.name)
                 tmp_path = tmp.name
+            doc = process_single_file(tmp_path, filename)
             if doc:
                 docs_to_add.append(doc)
+                processed_files.append(filename)
             else:
+                failed_files.append(filename)
+            os.unlink(tmp_path)
         except Exception as e:
+            print(f"❌ Error: {e}")
             failed_files.append(file.filename)
     if docs_to_add:
         try:
             add_documents_to_qdrant(docs_to_add)
+        except Exception as e:
+            return jsonify({"error": f"Failed to add to database: {str(e)}"}), 500
+    return jsonify({
+        "message": f"Processed {len(processed_files)} files",
         "processed_files": processed_files,
         "failed_files": failed_files
+    })
+@flask_app.route('/api/health', methods=['GET'])
+def health_check():
+    return jsonify({"status": "ok", "message": "API is running"})
 # -------------------------------
+# Gradio UI
 # -------------------------------
+def gradio_chat_response(message, history):
+    response_data = get_rag_response(message, k=3)
+    answer = response_data['answer']
+    sources = response_data['sources']
+    sources_md = "\n\n---\n**📚 Sources:**\n"
+    for doc in sources:
+        sources_md += f"* {doc['source']} (Score: {doc['score']:.4f})\n"
+    return answer + sources_md
+def gradio_upload(file_list):
+    if not file_list:
+        return "No files uploaded."
+    docs_to_add = []
+    processed = 0
+    failed = 0
+    for file_obj in file_list:
+        full_path = file_obj.name
+        filename = os.path.basename(full_path)
+        try:
+            doc = process_single_file(full_path, filename)
+            if doc:
+                docs_to_add.append(doc)
+                processed += 1
+            else:
+                failed += 1
+        except Exception as e:
+            print(f"❌ Error: {e}")
+            failed += 1
+    if docs_to_add:
+        try:
+            add_documents_to_qdrant(docs_to_add)
+        except Exception as e:
+            return f"❌ Error: {e}"
+    return f"✅ Processed {processed} files. Failed: {failed}."
+with gr.Blocks(theme="soft") as gradio_ui:
+    gr.Markdown("# 🧠 Multimodal RAG System")
+    with gr.Tabs():
+        with gr.TabItem("💬 Chat"):
+            gr.ChatInterface(
+                fn=gradio_chat_response,
+                title="Chat with Documents",
+                description="Ask questions about your documents",
+                examples=[
+                    "What documents contain bar charts?",
+                    "Summarize the environmental report",
+                    "What are the key findings?"
+                ]
+            )
+        with gr.TabItem("📤 Upload"):
+            gr.Markdown("Upload new documents to the knowledge base")
+            file_uploader = gr.File(
+                label="Upload Documents",
+                file_count="multiple",
+                file_types=["image", ".pdf", ".txt", ".md"]
+            )
+            upload_btn = gr.Button("Process Documents", variant="primary")
+            status = gr.Markdown("Ready to upload.")
+            upload_btn.click(fn=gradio_upload, inputs=[file_uploader], outputs=[status])
+# -------------------------------
+# Initialize and Run
+# -------------------------------
+if __name__ == "__main__":
+    print("🚀 Starting Multimodal RAG System...")
+    # Build initial database if data folder exists
+    folder = "data"
+    if os.path.exists(folder):
+        print(f"\n📂 Found '{folder}' folder, building database...")
+        build_or_update_qdrant_store(folder)
+    # Launch both Flask and Gradio
+    from werkzeug.serving import run_simple
+    from werkzeug.middleware.dispatcher import DispatcherMiddleware
+    # Mount Gradio at root, Flask API at /api
+    application = DispatcherMiddleware(
+        gradio_ui.launch(prevent_thread_lock=True, show_error=True),
+        {'/api': flask_app}
+    )
+    print("\n✅ Server starting on http://0.0.0.0:7860")
+    print("   - Gradio UI: http://0.0.0.0:7860")
+    print("   - Flask API: http://0.0.0.0:7860/api/query")
+    print("   - Health Check: http://0.0.0.0:7860/api/health")
+    run_simple('0.0.0.0', 7860, application, use_reloader=False, use_debugger=True)

git ADDED Viewed

File without changes

requirements.txt CHANGED Viewed

@@ -12,6 +12,7 @@ sentence-transformers==5.1.2
 langchain-qdrant==1.1.0
 qdrant-client==1.15.1
 fastapi
 uvicorn

 langchain-qdrant==1.1.0
 qdrant-client==1.15.1
+flask
 fastapi
 uvicorn