Spaces:

Zubair67
/

ocurement-rules

Runtime error

App Files Files Community

ocurement-rules / app.py

Zubair67

Update app.py

6d09556 verified over 1 year ago

raw

history blame contribute delete

3.01 kB

	# app.py
	import os
	import streamlit as st
	from PyPDF2 import PdfReader
	import faiss
	import requests
	from groq import Groq
	import tempfile

	def download_pdf_from_drive(link):
	file_id = link.split("/d/")[1].split("/")[0]
	download_url = f"https://drive.google.com/uc?export=download&id={file_id}"
	response = requests.get(download_url)
	if response.status_code == 200:
	temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
	with open(temp_file.name, "wb") as f:
	f.write(response.content)
	return temp_file.name
	else:
	st.error("Failed to download PDF file from Google Drive.")
	return None

	# Groq API Setup
	GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
	client = Groq(api_key=GROQ_API_KEY)

	# PDF Data Extraction Function
	def extract_text_from_pdf(pdf_path):
	reader = PdfReader(pdf_path)
	text = ""
	for page in reader.pages:
	text += page.extract_text() + "\\n"
	return text

	# FAISS Vector Store Setup
	def create_faiss_index(text):
	# Dummy example: tokenize and embed text
	# In production, use an actual embedding model
	import numpy as np
	tokenized = text.split(" ")
	vectors = [np.random.rand(128) for _ in tokenized] # Replace with real embeddings

	index = faiss.IndexFlatL2(128)
	faiss_vectors = np.array(vectors, dtype='float32')
	index.add(faiss_vectors)
	return index, tokenized

	# Query Function
	def query_faiss_index(index, tokenized_text, query):
	import numpy as np
	query_vector = np.random.rand(128).astype('float32') # Replace with real embedding
	distances, indices = index.search(np.array([query_vector]), k=5)
	results = [tokenized_text[i] for i in indices[0]]
	return results

	# Streamlit Frontend
	def main():
	st.title("RAG-Based Application")

	drive_link = st.text_input("Enter Google Drive PDF Link")
	query = st.text_input("Enter your query")

	if drive_link and query:
	st.write("Downloading PDF from Google Drive...")
	pdf_path = download_pdf_from_drive(drive_link)

	if pdf_path:
	st.write("Extracting data from PDF...")
	text = extract_text_from_pdf(pdf_path)
	st.write("Data extracted successfully!")

	st.write("Creating FAISS index...")
	index, tokenized_text = create_faiss_index(text)
	st.write("Index created successfully!")

	st.write("Querying the index...")
	results = query_faiss_index(index, tokenized_text, query)
	st.write("Results:")
	for result in results:
	st.write(result)

	if st.button("Ask Groq API"):
	messages = [
	{
	"role": "user",
	"content": query,
	}
	]
	chat_completion = client.chat.completions.create(
	messages=messages, model="llama-3.3-70b-versatile"
	)
	st.write(chat_completion.choices[0].message.content)

	if __name__ == "__main__":
	main()