Spaces:

khababakhtar
/

Load-Balancing-App

Sleeping

App Files Files Community

Load-Balancing-App / app.py

khababakhtar

Update app.py

0b21087 verified about 1 year ago

raw

history blame contribute delete

2.59 kB

	import os
	import numpy as np
	import faiss
	import pytesseract
	from pdf2image import convert_from_path
	import requests
	import streamlit as st
	from groq import Groq

	# Set up Groq client
	client = Groq(api_key=os.environ.get("GROQ_API_KEY"))

	# Function to extract text from PDF
	def extract_text_from_pdf(pdf_path):
	images = convert_from_path(pdf_path)
	text = ""
	for page in images:
	text += pytesseract.image_to_string(page)
	return text

	# Function to chunk the text
	def create_chunks(text, chunk_size=200):
	words = text.split()
	chunks = [" ".join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
	return chunks

	# Function to store chunks in FAISS (GPU enabled)
	def store_chunks_in_faiss(chunks):
	vector_dim = 768 # Assuming embeddings are 768-dimensional
	index = faiss.IndexFlatL2(vector_dim)

	# Move index to GPU if available
	res = faiss.StandardGpuResources()
	index = faiss.index_cpu_to_gpu(res, 0, index)

	# Generate dummy embeddings for demonstration
	embeddings = np.random.rand(len(chunks), vector_dim).astype("float32")
	index.add(embeddings)
	return index

	# Check if FAISS is using GPU
	def is_gpu_available():
	return faiss.get_num_gpus() > 0

	# Streamlit app interface
	st.title("PDF Content Chunking and Retrieval with FAISS-GPU")

	# PDF upload
	uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])

	if uploaded_file:
	st.write("Processing the uploaded file...")
	with open("uploaded_file.pdf", "wb") as f:
	f.write(uploaded_file.getbuffer())

	# Extract text
	extracted_text = extract_text_from_pdf("uploaded_file.pdf")
	st.text_area("Extracted Text", extracted_text, height=200)

	# Chunk text
	st.write("Creating chunks...")
	chunks = create_chunks(extracted_text)
	st.write(f"Total chunks created: {len(chunks)}")

	# Store chunks in FAISS
	st.write("Storing chunks in FAISS...")
	index = store_chunks_in_faiss(chunks)

	if is_gpu_available():
	st.success("FAISS is using GPU resources!")
	else:
	st.warning("FAISS is running on CPU.")

	st.write("Chunks successfully stored in the FAISS index!")

	# Interaction with Groq
	user_input = st.text_input("Ask a question about the content:")
	if user_input:
	st.write("Sending query to Groq API...")
	response = client.chat.completions.create(
	messages=[{"role": "user", "content": user_input}],
	model="llama-3.3-70b-versatile"
	)
	st.text_area("Groq API Response", response.choices[0].message.content, height=100)