Spaces:

Mehak900
/

docs_RAG_app

Sleeping

App Files Files Community

Mehak900 commited on Aug 7, 2025

Commit

fad3f30

verified ·

1 Parent(s): 5d66b63

Create app.py

Browse files

Files changed (1) hide show

app.py +73 -0

app.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import streamlit as st
+import os
+from groq import Groq
+from PyPDF2 import PdfReader
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain_community.vectorstores import FAISS
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.docstore.document import Document
+# Set your Groq API key directly (recommended for Hugging Face Spaces)
+GROQ_API_KEY = "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"  # 🔁 Replace this with your actual API key
+# Initialize Groq client
+groq_client = Groq(api_key=GROQ_API_KEY)
+# Load embedding model
+embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
+# Function to extract text from PDF
+def extract_text_from_pdf(uploaded_file):
+    reader = PdfReader(uploaded_file)
+    text = ""
+    for page in reader.pages:
+        page_text = page.extract_text()
+        if page_text:
+            text += page_text
+    return text
+# Function to split text into chunks
+def chunk_text(text):
+    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
+    chunks = splitter.split_text(text)
+    return [Document(page_content=chunk) for chunk in chunks]
+# Create FAISS vector index
+def create_faiss_index(documents):
+    return FAISS.from_documents(documents, embedding_model)
+# Search similar chunks
+def search_faiss_index(query, index, k=3):
+    return index.similarity_search(query, k=k)
+# Generate answer using Groq model
+def generate_answer(query, context_chunks):
+    context = "\n".join([doc.page_content for doc in context_chunks])
+    prompt = f"""Answer the following question based on the context:\n\n{context}\n\nQuestion: {query}"""
+    response = groq_client.chat.completions.create(
+        messages=[{"role": "user", "content": prompt}],
+        model="llama-3.1-8b-instant"  # ✅ Correct current model name on Groq
+    )
+    return response.choices[0].message.content
+# Streamlit UI
+st.title("📄 RAG-based PDF QA App (Groq + FAISS)")
+uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
+if uploaded_file:
+    with st.spinner("Reading and processing document..."):
+        raw_text = extract_text_from_pdf(uploaded_file)
+        documents = chunk_text(raw_text)
+        vector_index = create_faiss_index(documents)
+        st.success("Document processed and indexed successfully!")
+    question = st.text_input("Ask a question based on the uploaded document:")
+    if question:
+        with st.spinner("Searching and generating answer..."):
+            related_chunks = search_faiss_index(question, vector_index)
+            answer = generate_answer(question, related_chunks)
+            st.subheader("📌 Answer:")
+            st.write(answer)