ocurement-rules / app.py
Zubair67's picture
Update app.py
6d09556 verified
# app.py
import os
import streamlit as st
from PyPDF2 import PdfReader
import faiss
import requests
from groq import Groq
import tempfile
def download_pdf_from_drive(link):
file_id = link.split("/d/")[1].split("/")[0]
download_url = f"https://drive.google.com/uc?export=download&id={file_id}"
response = requests.get(download_url)
if response.status_code == 200:
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
with open(temp_file.name, "wb") as f:
f.write(response.content)
return temp_file.name
else:
st.error("Failed to download PDF file from Google Drive.")
return None
# Groq API Setup
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
client = Groq(api_key=GROQ_API_KEY)
# PDF Data Extraction Function
def extract_text_from_pdf(pdf_path):
reader = PdfReader(pdf_path)
text = ""
for page in reader.pages:
text += page.extract_text() + "\\n"
return text
# FAISS Vector Store Setup
def create_faiss_index(text):
# Dummy example: tokenize and embed text
# In production, use an actual embedding model
import numpy as np
tokenized = text.split(" ")
vectors = [np.random.rand(128) for _ in tokenized] # Replace with real embeddings
index = faiss.IndexFlatL2(128)
faiss_vectors = np.array(vectors, dtype='float32')
index.add(faiss_vectors)
return index, tokenized
# Query Function
def query_faiss_index(index, tokenized_text, query):
import numpy as np
query_vector = np.random.rand(128).astype('float32') # Replace with real embedding
distances, indices = index.search(np.array([query_vector]), k=5)
results = [tokenized_text[i] for i in indices[0]]
return results
# Streamlit Frontend
def main():
st.title("RAG-Based Application")
drive_link = st.text_input("Enter Google Drive PDF Link")
query = st.text_input("Enter your query")
if drive_link and query:
st.write("Downloading PDF from Google Drive...")
pdf_path = download_pdf_from_drive(drive_link)
if pdf_path:
st.write("Extracting data from PDF...")
text = extract_text_from_pdf(pdf_path)
st.write("Data extracted successfully!")
st.write("Creating FAISS index...")
index, tokenized_text = create_faiss_index(text)
st.write("Index created successfully!")
st.write("Querying the index...")
results = query_faiss_index(index, tokenized_text, query)
st.write("Results:")
for result in results:
st.write(result)
if st.button("Ask Groq API"):
messages = [
{
"role": "user",
"content": query,
}
]
chat_completion = client.chat.completions.create(
messages=messages, model="llama-3.3-70b-versatile"
)
st.write(chat_completion.choices[0].message.content)
if __name__ == "__main__":
main()