makhdoomnaeem commited on
Commit
9a44baf
·
verified ·
1 Parent(s): f809e7b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +113 -0
app.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ import requests
4
+ from PyPDF2 import PdfReader
5
+ from langchain_community.vectorstores import FAISS
6
+ from langchain.embeddings.huggingface import HuggingFaceEmbeddings
7
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
8
+ from groq import Groq # Import Groq client library
9
+
10
+ # Function to initialize Groq client
11
+ def initialize_groq_client(api_key):
12
+ try:
13
+ return Groq(api_key=api_key)
14
+ except Exception as e:
15
+ st.error(f"Failed to initialize Groq client: {e}")
16
+ return None
17
+
18
+ # Function to download the PDF from Google Drive
19
+ def download_pdf(drive_link):
20
+ file_id = drive_link.split("/d/")[1].split("/view")[0]
21
+ url = f"https://drive.google.com/uc?id={file_id}&export=download"
22
+ response = requests.get(url)
23
+ with open("document.pdf", "wb") as f:
24
+ f.write(response.content)
25
+ return "document.pdf"
26
+
27
+ # Function to extract text from PDF
28
+ def extract_text_from_pdf(pdf_file):
29
+ reader = PdfReader(pdf_file)
30
+ text = ""
31
+ for page in reader.pages:
32
+ text += page.extract_text()
33
+ return text
34
+
35
+ # Function to create FAISS vector database
36
+ def create_vector_db(text):
37
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
38
+ chunks = text_splitter.split_text(text)
39
+
40
+ # Use Hugging Face Embeddings
41
+ model_name = "all-MiniLM-L6-v2"
42
+ embeddings = HuggingFaceEmbeddings(model_name=model_name)
43
+ vector_db = FAISS.from_texts(chunks, embeddings)
44
+ return vector_db
45
+
46
+ # Function to query Groq API using Groq client
47
+ def query_groq_api(client, query, context, model="llama-3.3-70b-versatile"):
48
+ try:
49
+ chat_completion = client.chat.completions.create(
50
+ messages=[
51
+ {"role": "system", "content": "You are an intelligent assistant."},
52
+ {"role": "user", "content": f"Context: {context}\nQuestion: {query}"}
53
+ ],
54
+ model=model,
55
+ stream=False,
56
+ )
57
+ return chat_completion.choices[0].message.content
58
+ except Exception as e:
59
+ return f"Error querying Groq API: {e}"
60
+
61
+ # Streamlit App
62
+ st.title("PDF Q&A with Groq API")
63
+
64
+ # Dynamic API Key Management
65
+ default_api_key = os.getenv("GROQ_API_KEY", "") # Check for API key in environment variable
66
+ api_key = st.text_input(
67
+ "Enter Groq API Key (leave blank to use environment variable):",
68
+ value=default_api_key,
69
+ type="password",
70
+ help="Provide your Groq API key. If left blank, the app will use the key from the environment variable."
71
+ )
72
+
73
+ if api_key:
74
+ groq_client = initialize_groq_client(api_key)
75
+ if groq_client:
76
+ st.success("Groq client initialized successfully!")
77
+ else:
78
+ st.error("Failed to initialize Groq client. Please check the API key.")
79
+
80
+ # Persistent state to store vector database
81
+ if "vector_db" not in st.session_state:
82
+ st.session_state.vector_db = None
83
+
84
+ # Upload PDF or use Google Drive link
85
+ pdf_link = st.text_input("Enter Google Drive link to PDF:")
86
+ upload_button = st.button("Process PDF")
87
+
88
+ if pdf_link and upload_button:
89
+ if not api_key or not groq_client:
90
+ st.error("Please provide a valid Groq API Key before proceeding.")
91
+ else:
92
+ st.info("Downloading and processing the PDF...")
93
+ pdf_file = download_pdf(pdf_link)
94
+ pdf_text = extract_text_from_pdf(pdf_file)
95
+ st.success("PDF processed successfully!")
96
+
97
+ # Create FAISS vector database
98
+ st.info("Creating vector database...")
99
+ st.session_state.vector_db = create_vector_db(pdf_text)
100
+ st.success("Vector database created!")
101
+
102
+ # Query the document
103
+ if st.session_state.vector_db and groq_client:
104
+ user_query = st.text_input("Ask a question about the document:")
105
+ if st.button("Submit Query"):
106
+ with st.spinner("Processing your query..."):
107
+ # Retrieve similar text chunks
108
+ similar_docs = st.session_state.vector_db.similarity_search(user_query, k=3)
109
+ context = " ".join([doc.page_content for doc in similar_docs])
110
+
111
+ # Send query with context to Groq API
112
+ response = query_groq_api(groq_client, user_query, context)
113
+ st.write("**Answer:**", response)