shamilcoded commited on
Commit
70d0eba
Β·
verified Β·
1 Parent(s): bd0f639

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -23
app.py CHANGED
@@ -1,28 +1,28 @@
1
  import streamlit as st
2
  import os
3
  import tempfile
4
- import faiss
5
  import fitz # PyMuPDF for PDFs
6
  import docx
7
  import openpyxl
8
- from langchain_community.embeddings import HuggingFaceEmbeddings
 
 
9
  from langchain.vectorstores import FAISS
10
  from langchain.text_splitter import RecursiveCharacterTextSplitter
11
  from langchain.docstore.document import Document
12
- from langchain_community.llms import Groq
13
  from langchain.chains import RetrievalQA
14
- from langchain.schema import Document as LCDocument
15
 
16
- # Initialize LLM
17
  llm = Groq(
18
  model="llama3-8b-8192",
19
- api_key=os.getenv("GROQ_API_KEY") # Put this in Hugging Face secrets
20
  )
21
 
22
  # Embeddings model
23
  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
24
 
25
- # File processors
26
  def read_pdf(file_path):
27
  text = ""
28
  doc = fitz.open(file_path)
@@ -51,37 +51,37 @@ def process_file(uploaded_file):
51
 
52
  if suffix.lower() == "pdf":
53
  return read_pdf(tmp_path)
54
- elif suffix.lower() in ["docx"]:
55
  return read_docx(tmp_path)
56
- elif suffix.lower() in ["xlsx"]:
57
  return read_excel(tmp_path)
58
  else:
59
  return "Unsupported file type."
60
 
61
- # Streamlit UI
62
- st.title("πŸ“„ RAG Document QA with Faiss + LLaMA3")
 
 
63
 
64
- uploaded_file = st.file_uploader("Upload a PDF, Word or Excel file", type=["pdf", "docx", "xlsx"])
65
 
66
  if uploaded_file:
67
  st.success("βœ… File uploaded successfully.")
68
- raw_text = process_file(uploaded_file)
 
69
 
70
- # Split text into chunks
71
  splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
72
- texts = splitter.split_text(raw_text)
73
- docs = [Document(page_content=t) for t in texts]
74
 
75
- # Embed and create vector store
76
- with st.spinner("Indexing document..."):
77
  db = FAISS.from_documents(docs, embedding_model)
78
  retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 4})
79
- qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)
80
 
81
- st.success("βœ… Document indexed! Ask your questions below:")
82
 
83
- user_query = st.text_input("❓ Ask a question about your document")
84
  if user_query:
85
  with st.spinner("Generating answer..."):
86
- answer = qa.run(user_query)
87
- st.markdown(f"**πŸ’¬ Answer:** {answer}")
 
1
  import streamlit as st
2
  import os
3
  import tempfile
 
4
  import fitz # PyMuPDF for PDFs
5
  import docx
6
  import openpyxl
7
+ import faiss
8
+
9
+ from langchain.embeddings import HuggingFaceEmbeddings
10
  from langchain.vectorstores import FAISS
11
  from langchain.text_splitter import RecursiveCharacterTextSplitter
12
  from langchain.docstore.document import Document
13
+ from langchain.llms import Groq
14
  from langchain.chains import RetrievalQA
 
15
 
16
+ # Load LLM (API key from Hugging Face secrets)
17
  llm = Groq(
18
  model="llama3-8b-8192",
19
+ api_key=os.getenv("GROQ_API_KEY")
20
  )
21
 
22
  # Embeddings model
23
  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
24
 
25
+ # File readers
26
  def read_pdf(file_path):
27
  text = ""
28
  doc = fitz.open(file_path)
 
51
 
52
  if suffix.lower() == "pdf":
53
  return read_pdf(tmp_path)
54
+ elif suffix.lower() == "docx":
55
  return read_docx(tmp_path)
56
+ elif suffix.lower() == "xlsx":
57
  return read_excel(tmp_path)
58
  else:
59
  return "Unsupported file type."
60
 
61
+ # Streamlit App
62
+ st.set_page_config(page_title="DocuQuery AI", layout="centered")
63
+ st.title("πŸ“„ DocuQuery AI")
64
+ st.markdown("Upload a document (PDF, Word, or Excel) and ask questions about its content using LLaMA3.")
65
 
66
+ uploaded_file = st.file_uploader("Upload your document", type=["pdf", "docx", "xlsx"])
67
 
68
  if uploaded_file:
69
  st.success("βœ… File uploaded successfully.")
70
+ with st.spinner("Reading and processing file..."):
71
+ raw_text = process_file(uploaded_file)
72
 
 
73
  splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
74
+ docs = [Document(page_content=chunk) for chunk in splitter.split_text(raw_text)]
 
75
 
76
+ with st.spinner("Indexing document with FAISS..."):
 
77
  db = FAISS.from_documents(docs, embedding_model)
78
  retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 4})
79
+ qa_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)
80
 
81
+ st.success("πŸ“š Document indexed. Ask your question below!")
82
 
83
+ user_query = st.text_input("❓ Ask something about the document:")
84
  if user_query:
85
  with st.spinner("Generating answer..."):
86
+ response = qa_chain.run(user_query)
87
+ st.markdown(f"**πŸ’¬ Answer:** {response}")