Spaces:

makhdoomnaeem
/

Questions_Answers

Sleeping

App Files Files Community

Questions_Answers / app.py

makhdoomnaeem

Update app.py

6518b55 verified about 1 year ago

raw

history blame contribute delete

3.97 kB

	import os
	import streamlit as st
	import requests
	from PyPDF2 import PdfReader
	from langchain_community.vectorstores import FAISS
	from langchain.embeddings.huggingface import HuggingFaceEmbeddings
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from groq import Groq

	# Hardcoded Google Drive link
	GOOGLE_DRIVE_LINK = "https://drive.google.com/file/d/1wv5gbGP0SA15BzoNUxprXhYx0jHhPgHl/view?usp=sharing"

	# Function to download the PDF from Google Drive
	def download_pdf():
	file_id = GOOGLE_DRIVE_LINK.split("/d/")[1].split("/view")[0]
	url = f"https://drive.google.com/uc?id={file_id}&export=download"
	response = requests.get(url)
	with open("document.pdf", "wb") as f:
	f.write(response.content)
	return "document.pdf"

	# Function to extract text from PDF
	def extract_text_from_pdf(pdf_file):
	reader = PdfReader(pdf_file)
	text = ""
	for page in reader.pages:
	text += page.extract_text()
	return text

	# Function to create FAISS vector database
	def create_vector_db(text):
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
	chunks = text_splitter.split_text(text)

	# Use Hugging Face Embeddings
	model_name = "all-MiniLM-L6-v2"
	embeddings = HuggingFaceEmbeddings(model_name=model_name)
	vector_db = FAISS.from_texts(chunks, embeddings)
	return vector_db

	# Function to query Groq API
	def query_groq_api(query, context, model="llama-3.3-70b-versatile"):
	# Define the Groq API key
	GROQ_API_KEY = "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"

	# Optionally set it as an environment variable (not necessary in this case)
	os.environ["GROQ_API_KEY"] = GROQ_API_KEY

	# API endpoint (Uncomment the URL)
	url = "https://api.groq.com/openai/v1/chat/completions"

	# Headers for the API request
	headers = {
	"Content-Type": "application/json",
	"Authorization": f"Bearer {os.getenv('GROQ_API_KEY')}", # Retrieve from environment
	}

	# Data to send to the API
	data = {
	"model": model,
	"messages": [
	{"role": "system", "content": "You are an intelligent assistant."},
	{"role": "user", "content": f"Context: {context}\nQuestion: {query}"}
	],
	}

	try:
	# Send POST request to Groq API
	response = requests.post(url, headers=headers, json=data)
	response.raise_for_status() # Raise an error for bad responses

	# Get the API response content
	result = response.json()

	# Extract the answer from the response
	return result.get("choices", [{}])[0].get("message", {}).get("content", "No response.")

	except requests.exceptions.RequestException as e:
	# Handle errors
	return f"Error: {e}"

	# Streamlit App
	st.title("PDF Book Querry and Response")

	# Persistent state to store vector database
	if "vector_db" not in st.session_state:
	st.session_state.vector_db = None

	# Process the hardcoded PDF link
	if st.button("Process PDF"):
	st.info("Downloading and processing the PDF...")
	pdf_file = download_pdf()
	pdf_text = extract_text_from_pdf(pdf_file)
	st.success("PDF processed successfully!")

	# Create FAISS vector database
	st.info("Creating vector database...")
	st.session_state.vector_db = create_vector_db(pdf_text)
	st.success("Vector database created!")

	# Query the document
	if st.session_state.vector_db:
	user_query = st.text_input("Ask a question about the document:")
	if st.button("Submit Query"):
	with st.spinner("Processing your query..."):
	# Retrieve similar text chunks
	similar_docs = st.session_state.vector_db.similarity_search(user_query, k=3)
	context = " ".join([doc.page_content for doc in similar_docs])

	# Send query with context to Groq API
	response = query_groq_api(user_query, context)
	st.write("Answer:", response)