usmanayaz commited on
Commit
b33709f
·
verified ·
1 Parent(s): 1a0c0d0

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -0
app.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ from PyPDF2 import PdfReader
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ from langchain.embeddings import HuggingFaceEmbeddings
6
+ from langchain.vectorstores import FAISS
7
+ from groq import Groq
8
+ import requests
9
+
10
+ # Helper function to download and load the PDF from Google Drive
11
+ def load_pdf_from_drive(output_path="downloaded_document.pdf"):
12
+ drive_link = "https://drive.google.com/file/d/1SzVEuEdKi4dHeKgDrUbmoq1MShB-hyG4/view?usp=drive_link"
13
+ file_id = drive_link.split("/d/")[1].split("/")[0]
14
+ download_url = f"https://drive.google.com/uc?export=download&id={file_id}"
15
+ response = requests.get(download_url)
16
+ with open(output_path, "wb") as f:
17
+ f.write(response.content)
18
+ return output_path
19
+
20
+ # Helper function to parse the PDF
21
+ def load_pdf_content(pdf_path):
22
+ reader = PdfReader(pdf_path)
23
+ text = ""
24
+ for page in reader.pages:
25
+ text += page.extract_text()
26
+ return text
27
+
28
+ # Define the Streamlit app
29
+ st.title("RAG-Based Application with Groq API")
30
+ st.write("Processing a predefined PDF document from Google Drive to create a vector database and interact with it.")
31
+
32
+ st.write("Downloading and processing the document...")
33
+
34
+ # Download and load content from the PDF
35
+ pdf_path = load_pdf_from_drive()
36
+ document_text = load_pdf_content(pdf_path)
37
+
38
+ # Split the text into manageable chunks
39
+ text_splitter = RecursiveCharacterTextSplitter(
40
+ chunk_size=1000, chunk_overlap=200
41
+ )
42
+ text_chunks = text_splitter.split_text(document_text)
43
+
44
+ st.write(f"Document split into {len(text_chunks)} chunks.")
45
+
46
+ # Initialize embedding function
47
+ embedding_function = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
48
+
49
+ # Create FAISS vector database
50
+ faiss_index = FAISS.from_texts(text_chunks, embedding=embedding_function)
51
+
52
+ st.write("Vector database created successfully.")
53
+
54
+ # Save the FAISS index
55
+ faiss_index.save_local("faiss_index")
56
+
57
+ # Initialize Groq client for querying
58
+ GROQ_API_KEY = "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"
59
+ client = Groq(api_key=GROQ_API_KEY)
60
+
61
+ # Chat interaction setup
62
+ st.write("Ask a question related to the document:")
63
+ user_query = st.text_input("Your question:")
64
+
65
+ if user_query:
66
+ query_response = client.chat.completions.create(
67
+ messages=[
68
+ {"role": "user", "content": user_query}
69
+ ],
70
+ model="llama-3.3-70b-versatile",
71
+ )
72
+ st.write("Response:")
73
+ st.write(query_response.choices[0].message.content)