Zubair67 commited on
Commit
2d86f45
·
verified ·
1 Parent(s): c97c196

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -43
app.py CHANGED
@@ -3,9 +3,11 @@ import streamlit as st
3
  from groq import Groq
4
  from langchain.vectorstores import FAISS
5
  from langchain.text_splitter import CharacterTextSplitter
6
- from langchain.embeddings import OpenAIEmbeddings
7
  from langchain.chains import RetrievalQA
8
  from langchain.prompts import PromptTemplate
 
 
9
 
10
  # Initialize Groq client
11
  client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
@@ -13,45 +15,48 @@ client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
13
  # Title of the application
14
  st.title("Public Procurement Rules Assistant")
15
 
16
- # Upload PDF
17
- uploaded_file = st.file_uploader("Upload the PPRA Rules 2004 PDF", type=["pdf"])
18
-
19
- if uploaded_file:
20
- from PyPDF2 import PdfReader
21
-
22
- # Read and extract text from PDF
23
- pdf_reader = PdfReader(uploaded_file)
24
- text = ""
25
- for page in pdf_reader.pages:
26
- text += page.extract_text()
27
-
28
- # Split text into chunks for embedding
29
- text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
30
- chunks = text_splitter.split_text(text)
31
-
32
- # Create embeddings and FAISS index
33
- embeddings = OpenAIEmbeddings()
34
- vectorstore = FAISS.from_texts(chunks, embeddings)
35
-
36
- # Set up retrieval-based QA
37
- retriever = vectorstore.as_retriever()
38
- qa_chain = RetrievalQA.from_chain_type(
39
- llm=lambda query: client.chat.completions.create(
40
- messages=[{"role": "user", "content": query}],
41
- model="llama-3.3-70b-versatile",
42
- ).choices[0].message.content,
43
- retriever=retriever,
44
- return_source_documents=True,
45
- )
46
-
47
- # Input for user query
48
- user_query = st.text_input("Ask a question about PPRA Rules 2004:")
49
- if user_query:
50
- response = qa_chain.run(user_query)
51
- st.subheader("Answer:")
52
- st.write(response)
53
-
54
- # Optional: Display relevant source documents
55
- st.subheader("Relevant Sources:")
56
- for doc in response["source_documents"]:
57
- st.write(doc.page_content)
 
 
 
 
3
  from groq import Groq
4
  from langchain.vectorstores import FAISS
5
  from langchain.text_splitter import CharacterTextSplitter
6
+ from langchain.embeddings import HuggingFaceEmbeddings
7
  from langchain.chains import RetrievalQA
8
  from langchain.prompts import PromptTemplate
9
+ from PyPDF2 import PdfReader
10
+ import requests
11
 
12
  # Initialize Groq client
13
  client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
 
15
  # Title of the application
16
  st.title("Public Procurement Rules Assistant")
17
 
18
+ # Download the PPRA Rules file from Google Drive
19
+ file_url = "https://drive.google.com/uc?export=download&id=1faNpSV_UIZzd3h08qtzvSRGmzDkNtmuA"
20
+ pdf_path = "PPRA_Rules_2004.pdf"
21
+
22
+ if not os.path.exists(pdf_path): # Download the file only if not already downloaded
23
+ response = requests.get(file_url)
24
+ with open(pdf_path, "wb") as f:
25
+ f.write(response.content)
26
+
27
+ # Read and extract text from the PDF
28
+ pdf_reader = PdfReader(pdf_path)
29
+ text = ""
30
+ for page in pdf_reader.pages:
31
+ text += page.extract_text()
32
+
33
+ # Split text into chunks for embedding
34
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
35
+ chunks = text_splitter.split_text(text)
36
+
37
+ # Use HuggingFace embeddings (Groq-compatible)
38
+ embeddings = HuggingFaceEmbeddings()
39
+ vectorstore = FAISS.from_texts(chunks, embeddings)
40
+
41
+ # Set up retrieval-based QA
42
+ retriever = vectorstore.as_retriever()
43
+ qa_chain = RetrievalQA.from_chain_type(
44
+ llm=lambda query: client.chat.completions.create(
45
+ messages=[{"role": "user", "content": query}],
46
+ model="llama-3.3-70b-versatile",
47
+ ).choices[0].message.content,
48
+ retriever=retriever,
49
+ return_source_documents=True,
50
+ )
51
+
52
+ # Input for user query
53
+ user_query = st.text_input("Ask a question about PPRA Rules 2004:")
54
+ if user_query:
55
+ response = qa_chain.run(user_query)
56
+ st.subheader("Answer:")
57
+ st.write(response)
58
+
59
+ # Optional: Display relevant source documents
60
+ st.subheader("Relevant Sources:")
61
+ for doc in response["source_documents"]:
62
+ st.write(doc.page_content)