Spaces:

Muqadas-13
/

Smart_RAG_Document_QA_Assistant

Build error

App Files Files Community

Muqadas-13 commited on Jul 16, 2025

Commit

d2ab873

verified ·

1 Parent(s): af7bae4

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -2

app.py CHANGED Viewed

@@ -7,14 +7,17 @@ import numpy as np
 from sentence_transformers import SentenceTransformer
 from groq import Groq
-# ✅ Get Groq API key
 client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
 embed_model = SentenceTransformer("all-MiniLM-L6-v2")
 INDEX = faiss.IndexFlatL2(384)
 stored_chunks = []
-# UI Styling
 st.markdown("""
     <style>
     .main-title {
@@ -39,6 +42,7 @@ st.markdown("""
 st.markdown('<div class="main-title">📄 Smart RAG Document QA Assistant</div>', unsafe_allow_html=True)
 def extract_text(file):
     if file.type == "application/pdf":
         reader = PdfReader(file)
@@ -50,20 +54,24 @@ def extract_text(file):
         return file.read().decode("utf-8")
     return ""
 def chunk_text(text, chunk_size=200):
     words = text.split()
     return [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
 def store_embeddings(chunks):
     vectors = embed_model.encode(chunks)
     INDEX.add(np.array(vectors, dtype=np.float32))
     stored_chunks.extend(chunks)
 def retrieve_similar_chunks(query, top_k=3):
     query_vector = embed_model.encode([query])
     distances, indices = INDEX.search(np.array(query_vector, dtype=np.float32), top_k)
     return [stored_chunks[i] for i in indices[0]]
 def get_llm_answer(query, context):
     prompt = f"Answer the question based on the following context:\n\n{context}\n\nQuestion: {query}"
     chat_completion = client.chat.completions.create(
@@ -72,9 +80,11 @@ def get_llm_answer(query, context):
     )
     return chat_completion.choices[0].message.content
 uploaded_file = st.file_uploader("📁 Upload your document", type=["pdf", "docx", "txt"])
 query = st.text_input("💬 Ask a question about your document")
 if uploaded_file:
     with st.spinner("Processing file..."):
         text = extract_text(uploaded_file)
@@ -82,10 +92,12 @@ if uploaded_file:
         store_embeddings(chunks)
     st.success("✅ Document uploaded and indexed!")
 if st.button("🧠 Get Answer") and query:
     with st.spinner("Thinking..."):
         context = "\n\n".join(retrieve_similar_chunks(query))
         answer = get_llm_answer(query, context)
         st.markdown(f'<div class="card"><b>Answer:</b><br>{answer}</div>', unsafe_allow_html=True)
 st.markdown("<br><center style='color: grey;'>Built by Muqadas with ❤️ using Streamlit + Groq + FAISS</center>", unsafe_allow_html=True)

 from sentence_transformers import SentenceTransformer
 from groq import Groq
+# ✅ Initialize Groq client with API key
 client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
+# ✅ Load embedding model
 embed_model = SentenceTransformer("all-MiniLM-L6-v2")
+# ✅ Initialize FAISS index
 INDEX = faiss.IndexFlatL2(384)
 stored_chunks = []
+# ✅ UI Styling
 st.markdown("""
     <style>
     .main-title {
 st.markdown('<div class="main-title">📄 Smart RAG Document QA Assistant</div>', unsafe_allow_html=True)
+# ✅ Extract text from various document types
 def extract_text(file):
     if file.type == "application/pdf":
         reader = PdfReader(file)
         return file.read().decode("utf-8")
     return ""
+# ✅ Split text into chunks
 def chunk_text(text, chunk_size=200):
     words = text.split()
     return [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
+# ✅ Store vector embeddings in FAISS
 def store_embeddings(chunks):
     vectors = embed_model.encode(chunks)
     INDEX.add(np.array(vectors, dtype=np.float32))
     stored_chunks.extend(chunks)
+# ✅ Retrieve similar chunks from FAISS
 def retrieve_similar_chunks(query, top_k=3):
     query_vector = embed_model.encode([query])
     distances, indices = INDEX.search(np.array(query_vector, dtype=np.float32), top_k)
     return [stored_chunks[i] for i in indices[0]]
+# ✅ Ask Groq LLaMA3 using context
 def get_llm_answer(query, context):
     prompt = f"Answer the question based on the following context:\n\n{context}\n\nQuestion: {query}"
     chat_completion = client.chat.completions.create(
     )
     return chat_completion.choices[0].message.content
+# ✅ Streamlit UI components
 uploaded_file = st.file_uploader("📁 Upload your document", type=["pdf", "docx", "txt"])
 query = st.text_input("💬 Ask a question about your document")
+# ✅ Process document
 if uploaded_file:
     with st.spinner("Processing file..."):
         text = extract_text(uploaded_file)
         store_embeddings(chunks)
     st.success("✅ Document uploaded and indexed!")
+# ✅ Ask question and get answer
 if st.button("🧠 Get Answer") and query:
     with st.spinner("Thinking..."):
         context = "\n\n".join(retrieve_similar_chunks(query))
         answer = get_llm_answer(query, context)
         st.markdown(f'<div class="card"><b>Answer:</b><br>{answer}</div>', unsafe_allow_html=True)
+# ✅ Footer
 st.markdown("<br><center style='color: grey;'>Built by Muqadas with ❤️ using Streamlit + Groq + FAISS</center>", unsafe_allow_html=True)