Spaces:

AamerAkhter
/

RAG_Prac_2

Sleeping

App Files Files Community

AamerAkhter commited on May 26, 2025

Commit

22bec1e

verified ·

1 Parent(s): 261053d

Create app.py

Browse files

Files changed (1) hide show

app.py +78 -0

app.py ADDED Viewed

	@@ -0,0 +1,78 @@

+import os
+import fitz  # PyMuPDF
+import streamlit as st
+from groq import Groq
+from sentence_transformers import SentenceTransformer
+import numpy as np
+import faiss
+# Set GROQ API key
+GROQ_API_KEY = os.getenv("GROQ_API_KEY")
+client = Groq(api_key=GROQ_API_KEY)
+# Load embedding model
+embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
+# --- Functions ---
+# Extract text from uploaded PDF
+def extract_text(file):
+    text = ""
+    with fitz.open(stream=file.read(), filetype="pdf") as doc:
+        for page in doc:
+            text += page.get_text()
+    return text
+# Split text into chunks
+def chunk_text(text, chunk_size=500):
+    words = text.split()
+    return [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
+# Generate embeddings
+def embed_chunks(chunks):
+    return embedding_model.encode(chunks)
+# Create FAISS index
+def create_faiss_index(embeddings):
+    dim = embeddings.shape[1]
+    index = faiss.IndexFlatL2(dim)
+    index.add(embeddings)
+    return index
+# Search top-k relevant chunks
+def search_similar_chunks(query, index, chunks, k=3):
+    query_embedding = embedding_model.encode([query])
+    D, I = index.search(np.array(query_embedding), k)
+    return [chunks[i] for i in I[0]]
+# Query LLM via GROQ
+def query_llm(context, question):
+    messages = [
+        {"role": "system", "content": "You are a helpful assistant that answers based on the provided context."},
+        {"role": "user", "content": f"Context:\n{context}\n\nQuestion:\n{question}"}
+    ]
+    response = client.chat.completions.create(
+        messages=messages,
+        model="llama3-8b-8192"
+    )
+    return response.choices[0].message.content
+# --- Streamlit Interface ---
+st.title("📄 RAG App (PDF → Context → Answer via GROQ)")
+st.markdown("Upload a PDF document and ask questions about its content.")
+uploaded_file = st.file_uploader("Upload PDF", type=["pdf"])
+if uploaded_file:
+    text = extract_text(uploaded_file)
+    chunks = chunk_text(text)
+    embeddings = embed_chunks(chunks)
+    index = create_faiss_index(np.array(embeddings))
+    question = st.text_input("Ask a question about the document:")
+    if question:
+        top_chunks = search_similar_chunks(question, index, chunks)
+        context = "\n".join(top_chunks)
+        answer = query_llm(context, question)
+        st.markdown("### ✅ Answer:")
+        st.write(answer)