Anas12-345 commited on
Commit
9a2dfaa
·
verified ·
1 Parent(s): 4fc2a80

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -0
app.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import PyPDF2
3
+ import faiss
4
+ import streamlit as st
5
+ from sentence_transformers import SentenceTransformer
6
+ from groq import Groq
7
+
8
+ # Set up Groq client
9
+ client = Groq(api_key="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")
10
+
11
+ # Initialize model and FAISS index
12
+ embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
13
+ index = faiss.IndexFlatL2(384) # Adjust dimension to match the embedding size
14
+
15
+ # PDF text extraction
16
+ def extract_text_from_pdf(pdf_file):
17
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
18
+ text = ""
19
+ for page in pdf_reader.pages:
20
+ text += page.extract_text()
21
+ return text
22
+
23
+ # Text chunking
24
+ def chunk_text(text, chunk_size=500):
25
+ return [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
26
+
27
+ # Embed and store in FAISS
28
+ def embed_and_store(chunks):
29
+ embeddings = embedding_model.encode(chunks)
30
+ index.add(embeddings)
31
+ return embeddings
32
+
33
+ # Retrieve relevant chunks
34
+ def retrieve_chunks(query, top_k=5):
35
+ query_embedding = embedding_model.encode([query])
36
+ distances, indices = index.search(query_embedding, top_k)
37
+ return indices
38
+
39
+ # Query Groq API
40
+ def query_groq(prompt):
41
+ chat_completion = client.chat.completions.create(
42
+ messages=[{"role": "user", "content": prompt}],
43
+ model="llama3-8b-8192"
44
+ )
45
+ return chat_completion.choices[0].message.content
46
+
47
+ # Streamlit UI
48
+ def main():
49
+ st.title("RAG-based PDF QA System")
50
+ st.sidebar.header("Upload and Interact")
51
+
52
+ uploaded_file = st.sidebar.file_uploader("Upload a PDF", type=["pdf"])
53
+
54
+ if uploaded_file:
55
+ st.sidebar.success("PDF Uploaded Successfully!")
56
+ text = extract_text_from_pdf(uploaded_file)
57
+ chunks = chunk_text(text)
58
+ embed_and_store(chunks)
59
+
60
+ st.write("PDF content has been processed and stored.")
61
+
62
+ query = st.text_input("Enter your question:")
63
+ if query:
64
+ indices = retrieve_chunks(query)
65
+ relevant_chunks = [chunks[i] for i in indices[0]]
66
+
67
+ prompt = " ".join(relevant_chunks) + f"\n\nQuestion: {query}"
68
+ answer = query_groq(prompt)
69
+ st.write("### Answer:")
70
+ st.write(answer)
71
+
72
+ if __name__ == "__main__":
73
+ main()