Spaces:
Runtime error
Runtime error
| # app.py | |
| import os | |
| import streamlit as st | |
| from PyPDF2 import PdfReader | |
| import faiss | |
| import requests | |
| from groq import Groq | |
| import tempfile | |
| def download_pdf_from_drive(link): | |
| file_id = link.split("/d/")[1].split("/")[0] | |
| download_url = f"https://drive.google.com/uc?export=download&id={file_id}" | |
| response = requests.get(download_url) | |
| if response.status_code == 200: | |
| temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") | |
| with open(temp_file.name, "wb") as f: | |
| f.write(response.content) | |
| return temp_file.name | |
| else: | |
| st.error("Failed to download PDF file from Google Drive.") | |
| return None | |
| # Groq API Setup | |
| GROQ_API_KEY = os.environ.get("GROQ_API_KEY") | |
| client = Groq(api_key=GROQ_API_KEY) | |
| # PDF Data Extraction Function | |
| def extract_text_from_pdf(pdf_path): | |
| reader = PdfReader(pdf_path) | |
| text = "" | |
| for page in reader.pages: | |
| text += page.extract_text() + "\\n" | |
| return text | |
| # FAISS Vector Store Setup | |
| def create_faiss_index(text): | |
| # Dummy example: tokenize and embed text | |
| # In production, use an actual embedding model | |
| import numpy as np | |
| tokenized = text.split(" ") | |
| vectors = [np.random.rand(128) for _ in tokenized] # Replace with real embeddings | |
| index = faiss.IndexFlatL2(128) | |
| faiss_vectors = np.array(vectors, dtype='float32') | |
| index.add(faiss_vectors) | |
| return index, tokenized | |
| # Query Function | |
| def query_faiss_index(index, tokenized_text, query): | |
| import numpy as np | |
| query_vector = np.random.rand(128).astype('float32') # Replace with real embedding | |
| distances, indices = index.search(np.array([query_vector]), k=5) | |
| results = [tokenized_text[i] for i in indices[0]] | |
| return results | |
| # Streamlit Frontend | |
| def main(): | |
| st.title("RAG-Based Application") | |
| drive_link = st.text_input("Enter Google Drive PDF Link") | |
| query = st.text_input("Enter your query") | |
| if drive_link and query: | |
| st.write("Downloading PDF from Google Drive...") | |
| pdf_path = download_pdf_from_drive(drive_link) | |
| if pdf_path: | |
| st.write("Extracting data from PDF...") | |
| text = extract_text_from_pdf(pdf_path) | |
| st.write("Data extracted successfully!") | |
| st.write("Creating FAISS index...") | |
| index, tokenized_text = create_faiss_index(text) | |
| st.write("Index created successfully!") | |
| st.write("Querying the index...") | |
| results = query_faiss_index(index, tokenized_text, query) | |
| st.write("Results:") | |
| for result in results: | |
| st.write(result) | |
| if st.button("Ask Groq API"): | |
| messages = [ | |
| { | |
| "role": "user", | |
| "content": query, | |
| } | |
| ] | |
| chat_completion = client.chat.completions.create( | |
| messages=messages, model="llama-3.3-70b-versatile" | |
| ) | |
| st.write(chat_completion.choices[0].message.content) | |
| if __name__ == "__main__": | |
| main() | |