Spaces:

shamilcoded
/

DocuQuery_AI

Sleeping

App Files Files Community

shamilcoded commited on Apr 18, 2025

Commit

70d0eba

verified ·

1 Parent(s): bd0f639

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -23

app.py CHANGED Viewed

@@ -1,28 +1,28 @@
 import streamlit as st
 import os
 import tempfile
-import faiss
 import fitz  # PyMuPDF for PDFs
 import docx
 import openpyxl
-from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain.vectorstores import FAISS
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.docstore.document import Document
-from langchain_community.llms import Groq
 from langchain.chains import RetrievalQA
-from langchain.schema import Document as LCDocument
-# Initialize LLM
 llm = Groq(
     model="llama3-8b-8192",
-    api_key=os.getenv("GROQ_API_KEY")  # Put this in Hugging Face secrets
 )
 # Embeddings model
 embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
-# File processors
 def read_pdf(file_path):
     text = ""
     doc = fitz.open(file_path)
@@ -51,37 +51,37 @@ def process_file(uploaded_file):
     if suffix.lower() == "pdf":
         return read_pdf(tmp_path)
-    elif suffix.lower() in ["docx"]:
         return read_docx(tmp_path)
-    elif suffix.lower() in ["xlsx"]:
         return read_excel(tmp_path)
     else:
         return "Unsupported file type."
-# Streamlit UI
-st.title("📄 RAG Document QA with Faiss + LLaMA3")
-uploaded_file = st.file_uploader("Upload a PDF, Word or Excel file", type=["pdf", "docx", "xlsx"])
 if uploaded_file:
     st.success("✅ File uploaded successfully.")
-    raw_text = process_file(uploaded_file)
-    # Split text into chunks
     splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
-    texts = splitter.split_text(raw_text)
-    docs = [Document(page_content=t) for t in texts]
-    # Embed and create vector store
-    with st.spinner("Indexing document..."):
         db = FAISS.from_documents(docs, embedding_model)
         retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 4})
-        qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)
-    st.success("✅ Document indexed! Ask your questions below:")
-    user_query = st.text_input("❓ Ask a question about your document")
     if user_query:
         with st.spinner("Generating answer..."):
-            answer = qa.run(user_query)
-            st.markdown(f"**💬 Answer:** {answer}")

 import streamlit as st
 import os
 import tempfile
 import fitz  # PyMuPDF for PDFs
 import docx
 import openpyxl
+import faiss
+from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.vectorstores import FAISS
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.docstore.document import Document
+from langchain.llms import Groq
 from langchain.chains import RetrievalQA
+# Load LLM (API key from Hugging Face secrets)
 llm = Groq(
     model="llama3-8b-8192",
+    api_key=os.getenv("GROQ_API_KEY")
 )
 # Embeddings model
 embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+# File readers
 def read_pdf(file_path):
     text = ""
     doc = fitz.open(file_path)
     if suffix.lower() == "pdf":
         return read_pdf(tmp_path)
+    elif suffix.lower() == "docx":
         return read_docx(tmp_path)
+    elif suffix.lower() == "xlsx":
         return read_excel(tmp_path)
     else:
         return "Unsupported file type."
+# Streamlit App
+st.set_page_config(page_title="DocuQuery AI", layout="centered")
+st.title("📄 DocuQuery AI")
+st.markdown("Upload a document (PDF, Word, or Excel) and ask questions about its content using LLaMA3.")
+uploaded_file = st.file_uploader("Upload your document", type=["pdf", "docx", "xlsx"])
 if uploaded_file:
     st.success("✅ File uploaded successfully.")
+    with st.spinner("Reading and processing file..."):
+        raw_text = process_file(uploaded_file)
     splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
+    docs = [Document(page_content=chunk) for chunk in splitter.split_text(raw_text)]
+    with st.spinner("Indexing document with FAISS..."):
         db = FAISS.from_documents(docs, embedding_model)
         retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 4})
+        qa_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)
+    st.success("📚 Document indexed. Ask your question below!")
+    user_query = st.text_input("❓ Ask something about the document:")
     if user_query:
         with st.spinner("Generating answer..."):
+            response = qa_chain.run(user_query)
+            st.markdown(f"**💬 Answer:** {response}")