makhdoomnaeem's picture
Update app.py
6518b55 verified
import os
import streamlit as st
import requests
from PyPDF2 import PdfReader
from langchain_community.vectorstores import FAISS
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from groq import Groq
# Hardcoded Google Drive link
GOOGLE_DRIVE_LINK = "https://drive.google.com/file/d/1wv5gbGP0SA15BzoNUxprXhYx0jHhPgHl/view?usp=sharing"
# Function to download the PDF from Google Drive
def download_pdf():
file_id = GOOGLE_DRIVE_LINK.split("/d/")[1].split("/view")[0]
url = f"https://drive.google.com/uc?id={file_id}&export=download"
response = requests.get(url)
with open("document.pdf", "wb") as f:
f.write(response.content)
return "document.pdf"
# Function to extract text from PDF
def extract_text_from_pdf(pdf_file):
reader = PdfReader(pdf_file)
text = ""
for page in reader.pages:
text += page.extract_text()
return text
# Function to create FAISS vector database
def create_vector_db(text):
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = text_splitter.split_text(text)
# Use Hugging Face Embeddings
model_name = "all-MiniLM-L6-v2"
embeddings = HuggingFaceEmbeddings(model_name=model_name)
vector_db = FAISS.from_texts(chunks, embeddings)
return vector_db
# Function to query Groq API
def query_groq_api(query, context, model="llama-3.3-70b-versatile"):
# Define the Groq API key
GROQ_API_KEY = "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"
# Optionally set it as an environment variable (not necessary in this case)
os.environ["GROQ_API_KEY"] = GROQ_API_KEY
# API endpoint (Uncomment the URL)
url = "https://api.groq.com/openai/v1/chat/completions"
# Headers for the API request
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {os.getenv('GROQ_API_KEY')}", # Retrieve from environment
}
# Data to send to the API
data = {
"model": model,
"messages": [
{"role": "system", "content": "You are an intelligent assistant."},
{"role": "user", "content": f"Context: {context}\nQuestion: {query}"}
],
}
try:
# Send POST request to Groq API
response = requests.post(url, headers=headers, json=data)
response.raise_for_status() # Raise an error for bad responses
# Get the API response content
result = response.json()
# Extract the answer from the response
return result.get("choices", [{}])[0].get("message", {}).get("content", "No response.")
except requests.exceptions.RequestException as e:
# Handle errors
return f"Error: {e}"
# Streamlit App
st.title("PDF Book Querry and Response")
# Persistent state to store vector database
if "vector_db" not in st.session_state:
st.session_state.vector_db = None
# Process the hardcoded PDF link
if st.button("Process PDF"):
st.info("Downloading and processing the PDF...")
pdf_file = download_pdf()
pdf_text = extract_text_from_pdf(pdf_file)
st.success("PDF processed successfully!")
# Create FAISS vector database
st.info("Creating vector database...")
st.session_state.vector_db = create_vector_db(pdf_text)
st.success("Vector database created!")
# Query the document
if st.session_state.vector_db:
user_query = st.text_input("Ask a question about the document:")
if st.button("Submit Query"):
with st.spinner("Processing your query..."):
# Retrieve similar text chunks
similar_docs = st.session_state.vector_db.similarity_search(user_query, k=3)
context = " ".join([doc.page_content for doc in similar_docs])
# Send query with context to Groq API
response = query_groq_api(user_query, context)
st.write("**Answer:**", response)