Spaces:

NHZ
/

First_Aid_Kit

Sleeping

App Files Files Community

First_Aid_Kit / app.py

NHZ

Update app.py

a16e520 verified over 1 year ago

raw

history blame

3 kB

	import os
	import streamlit as st
	import PyPDF2
	import requests
	import faiss
	from groq import Groq

	# Initialize Groq client using the secret environment variable
	client = Groq(api_key=os.getenv("GROQ_API_KEY"))

	# Function to download and read PDF content
	def extract_text_from_google_drive():
	link = "https://drive.google.com/uc?id=1XvqA1OIssRs2gbmOtKFKj-02yQ5X2yg0"
	response = requests.get(link)
	with open("document.pdf", "wb") as file:
	file.write(response.content)

	with open("document.pdf", "rb") as file:
	reader = PyPDF2.PdfReader(file)
	text = " ".join([page.extract_text() for page in reader.pages])
	return text

	# Function to chunk text
	def chunk_text(text, max_length=500):
	sentences = text.split(". ")
	chunks = []
	chunk = ""
	for sentence in sentences:
	if len(chunk) + len(sentence) <= max_length:
	chunk += sentence + ". "
	else:
	chunks.append(chunk.strip())
	chunk = sentence + ". "
	if chunk:
	chunks.append(chunk.strip())
	return chunks

	# Function to compute simple embeddings
	def compute_embeddings(chunks):
	embeddings = []
	for chunk in chunks:
	vector = [ord(char) for char in chunk[:300]] # Truncate to 300 characters
	padded_vector = vector + [0] * (300 - len(vector)) # Zero-pad to fixed size
	embeddings.append(padded_vector)
	return embeddings

	# Function to create FAISS index
	def create_faiss_index(embeddings):
	dimension = len(embeddings[0])
	index = faiss.IndexFlatL2(dimension)
	index.add(faiss.FloatVectorArray(embeddings))
	return index

	# Function to query Groq API
	def query_groq(question, model_name="llama-3.3-70b-versatile"):
	chat_completion = client.chat.completions.create(
	messages=[{"role": "user", "content": question}],
	model=model_name,
	)
	return chat_completion.choices[0].message.content

	# Streamlit app
	def main():
	st.title("RAG-based Application with Groq API")
	st.subheader("Query the document stored on Google Drive")

	st.write("Extracting text from the document...")
	text = extract_text_from_google_drive()
	st.write("Document text extracted successfully!")

	st.write("Chunking and embedding text...")
	chunks = chunk_text(text)
	embeddings = compute_embeddings(chunks)
	index = create_faiss_index(embeddings)
	st.write(f"Created FAISS index with {len(chunks)} chunks.")

	# Query input
	question = st.text_input("Ask a question based on the document:")
	if question:
	st.write("Searching for relevant chunks...")
	question_embedding = compute_embeddings([question])[0]
	_, indices = index.search(faiss.FloatVectorArray([question_embedding]), k=1)
	relevant_chunk = chunks[indices[0][0]]

	st.write("Generating answer using Groq API...")
	answer = query_groq(relevant_chunk)
	st.write("### Answer:")
	st.write(answer)

	if __name__ == "__main__":
	main()