Spaces:

jk12p
/

RAG

Running

App Files Files Community

jk12p commited on Apr 30, 2025

Commit

5add394

verified ·

1 Parent(s): 6297f96

Create app.py

Browse files

Files changed (1) hide show

app.py +83 -0

app.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import streamlit as st
+import torch
+import fitz  # PyMuPDF
+from sentence_transformers import SentenceTransformer
+import faiss
+import numpy as np
+from transformers import AutoTokenizer, AutoModelForCausalLM
+# --- CONFIG ---
+HF_TOKEN = "your_huggingface_token_here"  # Add your Hugging Face token
+# Load tokenizer and model with optimizations
+tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it")
+model = AutoModelForCausalLM.from_pretrained(
+    "google/gemma-2b-it",
+    torch_dtype=torch.float16,  # Use half-precision for less memory
+    device_map="auto"  # This will place the model on the best device (CPU/GPU)
+)
+# Load sentence transformer model for embedding generation
+embedder = SentenceTransformer("all-MiniLM-L6-v2")
+# --- UI ---
+st.title("🔍 RAG App using 🤖 Gemma 2B")
+uploaded_file = st.file_uploader("📄 Upload a PDF or TXT file", type=["pdf", "txt"])
+# Extract text from file (PDF/TXT)
+def extract_text(file):
+    text = ""
+    if file.type == "application/pdf":
+        doc = fitz.open(stream=file.read(), filetype="pdf")
+        for page in doc:
+            text += page.get_text()
+    elif file.type == "text/plain":
+        text = file.read().decode("utf-8")
+    return text
+# Split text into chunks for indexing
+def split_into_chunks(text, chunk_size=500):
+    return [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
+# Create FAISS index for fast retrieval
+def create_faiss_index(chunks):
+    embeddings = embedder.encode(chunks)
+    dim = embeddings.shape[1]
+    index = faiss.IndexFlatL2(dim)
+    index.add(np.array(embeddings))
+    return index, embeddings
+# Retrieve top-k relevant chunks for the query
+def retrieve_chunks(query, chunks, index, embeddings, k=3):
+    query_embedding = embedder.encode([query])
+    D, I = index.search(np.array(query_embedding), k)
+    return [chunks[i] for i in I[0]]
+# --- MAIN LOGIC ---
+if uploaded_file:
+    st.success("✅ File uploaded successfully!")
+    raw_text = extract_text(uploaded_file)
+    chunks = split_into_chunks(raw_text)
+    st.info(f"📚 Document split into {len(chunks)} chunks")
+    index, embeddings = create_faiss_index(chunks)
+    user_question = st.text_input("💬 Ask something about the document:")
+    if user_question:
+        with st.spinner("Thinking..."):
+            context = "\n".join(retrieve_chunks(user_question, chunks, index, embeddings))
+            # Generate response from Gemma 2B
+            input_ids = tokenizer.encode(f"Answer the question based on the context below:\n\nContext:\n{context}\n\nQuestion: {user_question}\nAnswer:", return_tensors="pt").to(model.device)
+            with torch.no_grad():  # Disable gradient computation for inference
+                outputs = model.generate(input_ids, max_length=512, num_return_sequences=1, temperature=0.7)
+            generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+            answer = generated_text.split("Answer:")[-1].strip()
+        st.markdown("### 🧠 Answer:")
+        st.success(answer)