Spaces:

Saint5
/

multimodal_rag_system

Sleeping

App Files Files Community

Saint5 commited on Aug 21, 2025

Commit

8cb5b3d

verified ·

1 Parent(s): 0a3175d

Uploading Mulitimodal Retrieval Augmented Generation System.

Browse files

Files changed (5) hide show

README.md +1 -1
app.py +14 -12
main.py +4 -5
model_setup.py +2 -2
utils.py +2 -2

README.md CHANGED Viewed

@@ -24,4 +24,4 @@ A **Multimodal Retrieval-Augmented Generation (RAG) system** that allows users t
 - Streams answers from the LLM using Gradio interface.
 - Efficient memory usage with bitsandbytes 4-bit quantization.
-The **[google/gemma-3-4b-it](https://huggingface.co/google/gemma-3-4b-it)** is both used to generate image descriptions for the extracted images and for text generation for the RAG system.

 - Streams answers from the LLM using Gradio interface.
 - Efficient memory usage with bitsandbytes 4-bit quantization.
+The **[google/gemma-3-4b-it](https://huggingface.co/google/gemma-3-4b-it)** is both used to generate image descriptions for the extracted images and for text generation for the RAG system.

app.py CHANGED Viewed

@@ -5,7 +5,9 @@ import os
 import hashlib
 import torch
 import gradio as gr
 from model_setup import embedding_model, model, processor
 from main import preprocess_pdf, semantic_search, generate_answer_stream
@@ -30,7 +32,7 @@ state = {
 def _make_cache_names(pdf_path: str) -> tuple[str, str]:
     """Generate unique cache file names per PDF based on hash of filename."""
-    pdf_hash = hashlib.md5(pdf_path.encode()).hexdigest[:8]  # Shorten for readability
     base_name = os.path.splitext(os.path.basename(pdf_path))[0]
     index_file = os.path.join(CACHE_DIR, f"{base_name}_{pdf_hash}_index.faiss")
     chunks_file = os.path.join(CACHE_DIR, f"{base_name}_{pdf_hash}_chunks.json")
@@ -40,7 +42,10 @@ def handle_pdf_upload(file):
     if file is None:
         return "[ERROR ⚠️] No file uploaded.", gr.update()
-    new_pdf_path = file.name
     state["pdf_path"] = new_pdf_path
     # Create unique cache file names for this PDF
@@ -56,6 +61,7 @@ def handle_pdf_upload(file):
         use_cache=True # allow cache for the PDF
     )
     state["index"], state["chunks"] = index, chunks
     # Store in processed_pdfs for later selection
     pdf_key = os.path.basename(state["pdf_path"])
@@ -71,18 +77,13 @@ def handle_pdf_selection(pdf_name):
     if pdf_name not in state["processed_pdfs"]:
         return "[ERROR] Selected PDF not found in cache."
-    state["pdf_path"] = pdf_name
     state["index_file"], state["chunks_file"] = state["processed_pdfs"][pdf_name]
     # Reload index + chunks from cache
-    index, chunks = preprocess_pdf(
-        file_path=pdf_name,
-        image_dir=state["image_dir"],
-        embedding_model=embedding_model,
-        index_file=state["index_file"],
-        chunks_file=state["chunks_file"],
-        use_cache=True
-    )
     state["index"], state["chunks"] = index, chunks
     return f"📂 Switched to cached PDF: {pdf_name}"
@@ -93,6 +94,7 @@ def chat_streaming(message, history):
     # Perform semantic search
     retrieved_chunks = semantic_search(message, embedding_model, state["index"], state["chunks"], top_k=10)
     # Stream the answer
     for partial in generate_answer_stream(message, retrieved_chunks, model, processor):
@@ -111,7 +113,7 @@ with gr.Blocks() as demo:
     with gr.Row():
         file_input = gr.File(label="📂Upload PDF")
-        upload_button = gr.Button("Process PDF")
     upload_status = gr.Textbox(label="Upload Status", interactive=False)
     pdf_selector = gr.Dropdown(label="📄 Select a Processed PDF", choices=[], interactive=True)

 import hashlib
 import torch
 import gradio as gr
+# import gc
+from utils import load_faiss_index, load_cache
 from model_setup import embedding_model, model, processor
 from main import preprocess_pdf, semantic_search, generate_answer_stream
 def _make_cache_names(pdf_path: str) -> tuple[str, str]:
     """Generate unique cache file names per PDF based on hash of filename."""
+    pdf_hash = hashlib.md5(pdf_path.encode()).hexdigest()[:8]  # Shorten for readability
     base_name = os.path.splitext(os.path.basename(pdf_path))[0]
     index_file = os.path.join(CACHE_DIR, f"{base_name}_{pdf_hash}_index.faiss")
     chunks_file = os.path.join(CACHE_DIR, f"{base_name}_{pdf_hash}_chunks.json")
     if file is None:
         return "[ERROR ⚠️] No file uploaded.", gr.update()
+    # Save uploaded file to cache directory to ensure accessibility
+    new_pdf_path = os.path.join(CACHE_DIR, file.name)
+    with open(new_pdf_path, "wb") as f_out:
+      f_out.write(file.read())
     state["pdf_path"] = new_pdf_path
     # Create unique cache file names for this PDF
         use_cache=True # allow cache for the PDF
     )
     state["index"], state["chunks"] = index, chunks
+    # gc.collect() # Free memeory after PDF processing
     # Store in processed_pdfs for later selection
     pdf_key = os.path.basename(state["pdf_path"])
     if pdf_name not in state["processed_pdfs"]:
         return "[ERROR] Selected PDF not found in cache."
+    state["pdf_path"] = os.path.join(CACHE_DIR, pdf_name)
     state["index_file"], state["chunks_file"] = state["processed_pdfs"][pdf_name]
     # Reload index + chunks from cache
+    index = load_faiss_index(state["index_file"])
+    chunks = load_cache(state["chunks_file"])
     state["index"], state["chunks"] = index, chunks
     return f"📂 Switched to cached PDF: {pdf_name}"
     # Perform semantic search
     retrieved_chunks = semantic_search(message, embedding_model, state["index"], state["chunks"], top_k=10)
+    # gc.collect() # Free memory after semantic search
     # Stream the answer
     for partial in generate_answer_stream(message, retrieved_chunks, model, processor):
     with gr.Row():
         file_input = gr.File(label="📂Upload PDF")
+        # upload_button = gr.Button("Process PDF")
     upload_status = gr.Textbox(label="Upload Status", interactive=False)
     pdf_selector = gr.Dropdown(label="📄 Select a Processed PDF", choices=[], interactive=True)

main.py CHANGED Viewed

@@ -9,10 +9,9 @@ import re
 import gc
 import numpy as np
-# from time import time
 from typing import List, Dict, Tuple
 from PIL import Image
-# from threading import Thread
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from transformers import TextIteratorStreamer
@@ -106,8 +105,8 @@ def generate_image_descriptions(image_paths):
       captions.append({"image_path": image_path, "caption": "<---image---> (Captioning failed)"}) # Add a placeholder caption
       continue
     finally:
-      clear_gpu_cache()
       gc.collect()
   return captions
 # Cleaning the captions from the extracted images
@@ -301,13 +300,13 @@ def generate_answer_stream(query, retrieved_chunks, model, processor):
   with torch.inference_mode():
     model.generate(**inputs, streamer=streamer, use_cache=True, max_new_tokens=512)
     gc.collect() # Free memory after model generation
   accumulated = ""
   for new_text in streamer:
     # time.sleep(0.2)
     accumulated += new_text
     yield accumulated
-  # Free memory after streaming
   clear_gpu_cache()
   gc.collect()

 import gc
 import numpy as np
 from typing import List, Dict, Tuple
 from PIL import Image
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from transformers import TextIteratorStreamer
       captions.append({"image_path": image_path, "caption": "<---image---> (Captioning failed)"}) # Add a placeholder caption
       continue
     finally:
       gc.collect()
+      clear_gpu_cache()
   return captions
 # Cleaning the captions from the extracted images
   with torch.inference_mode():
     model.generate(**inputs, streamer=streamer, use_cache=True, max_new_tokens=512)
     gc.collect() # Free memory after model generation
   accumulated = ""
   for new_text in streamer:
     # time.sleep(0.2)
     accumulated += new_text
     yield accumulated
+  # Free memory after streaming is complete
   clear_gpu_cache()
   gc.collect()

model_setup.py CHANGED Viewed

@@ -7,6 +7,7 @@ import gc
 from sentence_transformers import SentenceTransformer
 from transformers import AutoProcessor, Gemma3ForConditionalGeneration, BitsAndBytesConfig
 from utils import clear_gpu_cache
 device = "cuda" if torch.cuda.is_available() else "cpu"
 # Embedding model
@@ -35,6 +36,5 @@ model.eval()
 # Processor
 processor = AutoProcessor.from_pretrained(model_name, use_fast=True)
-# Free memory
 clear_gpu_cache()
-gc.collect()

 from sentence_transformers import SentenceTransformer
 from transformers import AutoProcessor, Gemma3ForConditionalGeneration, BitsAndBytesConfig
 from utils import clear_gpu_cache
 device = "cuda" if torch.cuda.is_available() else "cpu"
 # Embedding model
 # Processor
 processor = AutoProcessor.from_pretrained(model_name, use_fast=True)
 clear_gpu_cache()
+gc.collect()

utils.py CHANGED Viewed

@@ -3,10 +3,10 @@
 import os
 import gc
 import json
 from typing import List, Dict
 import faiss
 import numpy as np
-import torch
 def save_cache(data: List[Dict], filepath: str) -> None:
   """Saving the chunks and the embeddings for easy retrieval in .json format"""
@@ -61,7 +61,7 @@ def cleanup_images(image_dir: str):
   except Exception as e:
     print(f"[WARNING] Failed to delete some images in {image_dir}: {e}")
-# Just being agnostic because this space may only be using CPU but why not?
 def clear_gpu_cache():
   """Clear GPU cache and run garbage collection(saving on memory)."""
   if torch.cuda.is_available():

 import os
 import gc
 import json
+import torch
 from typing import List, Dict
 import faiss
 import numpy as np
 def save_cache(data: List[Dict], filepath: str) -> None:
   """Saving the chunks and the embeddings for easy retrieval in .json format"""
   except Exception as e:
     print(f"[WARNING] Failed to delete some images in {image_dir}: {e}")
+# Just being agnostic because my space may only be using CPU but why not?
 def clear_gpu_cache():
   """Clear GPU cache and run garbage collection(saving on memory)."""
   if torch.cuda.is_available():