chburhan64 commited on
Commit
8db5210
Β·
verified Β·
1 Parent(s): b2468ac

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +105 -0
app.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import time
4
+ import tempfile
5
+ from dotenv import load_dotenv
6
+
7
+ from langchain_groq import ChatGroq
8
+ from langchain_community.vectorstores import FAISS
9
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
10
+ from langchain.chains.combine_documents import create_stuff_documents_chain
11
+ from langchain.chains import create_retrieval_chain
12
+ from langchain_core.prompts import ChatPromptTemplate
13
+ from langchain.docstore.document import Document
14
+
15
+ from sentence_transformers import SentenceTransformer
16
+ import numpy as np
17
+ import faiss
18
+ import PyPDF2
19
+
20
+ # Load environment variables
21
+ load_dotenv()
22
+ groq_api_key = os.getenv("GROQ_API_KEY")
23
+
24
+ st.set_page_config(page_title="Document Q&A with Llama3")
25
+ st.title("πŸ“„ Document Q&A with Llama3 (via Groq)")
26
+
27
+ # Load the LLM
28
+ llm = ChatGroq(groq_api_key=groq_api_key, model_name="Llama3-8b-8192")
29
+
30
+ # Prompt template
31
+ prompt = ChatPromptTemplate.from_template("""
32
+ Answer the question based only on the provided context.
33
+
34
+ <context>
35
+ {context}
36
+ </context>
37
+
38
+ Question: {input}
39
+ """)
40
+
41
+ # Load sentence-transformers model
42
+ embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
43
+
44
+ # Function to extract and split text from uploaded PDFs
45
+ def process_pdfs(uploaded_files):
46
+ docs = []
47
+ for file in uploaded_files:
48
+ reader = PyPDF2.PdfReader(file)
49
+ text = ""
50
+ for page in reader.pages:
51
+ text += page.extract_text() or ""
52
+ docs.append(Document(page_content=text, metadata={"source": file.name}))
53
+
54
+ # Split into chunks
55
+ splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
56
+ split_docs = splitter.split_documents(docs)
57
+ return split_docs
58
+
59
+ # Create FAISS index
60
+ def create_vector_store(documents):
61
+ texts = [doc.page_content for doc in documents]
62
+ embeddings = embedding_model.encode(texts)
63
+ index = faiss.IndexFlatL2(embeddings.shape[1])
64
+ index.add(np.array(embeddings))
65
+ vectorstore = FAISS(embedding_function=lambda x: embedding_model.encode([x])[0],
66
+ index=index,
67
+ documents=documents)
68
+ return vectorstore
69
+
70
+ # File uploader
71
+ uploaded_files = st.file_uploader("πŸ“ Upload one or more PDF files", type=["pdf"], accept_multiple_files=True)
72
+
73
+ # Button to process documents
74
+ if uploaded_files and st.button("πŸ“š Process Documents"):
75
+ with st.spinner("Processing PDFs and creating vector store..."):
76
+ documents = process_pdfs(uploaded_files)
77
+ st.session_state.vectors = create_vector_store(documents)
78
+ st.success("βœ… Documents processed and vector store created!")
79
+
80
+ # Question input
81
+ query = st.text_input("πŸ’¬ Ask a question about the uploaded documents")
82
+
83
+ # Answering
84
+ if query and "vectors" in st.session_state:
85
+ document_chain = create_stuff_documents_chain(llm, prompt)
86
+ retriever = st.session_state.vectors.as_retriever()
87
+ retrieval_chain = create_retrieval_chain(retriever, document_chain)
88
+
89
+ with st.spinner("Generating answer..."):
90
+ start = time.process_time()
91
+ response = retrieval_chain.invoke({'input': query})
92
+ end = time.process_time()
93
+
94
+ st.markdown("### βœ… Answer:")
95
+ st.write(response['answer'])
96
+
97
+ st.markdown(f"⏱️ Response time: {end - start:.2f} seconds")
98
+
99
+ with st.expander("πŸ” Document Chunks Used"):
100
+ for i, doc in enumerate(response.get("context", [])):
101
+ st.write(doc.page_content)
102
+ st.write("---")
103
+
104
+ elif query and "vectors" not in st.session_state:
105
+ st.warning("⚠️ Please upload and process PDFs first.")