Spaces:
Sleeping
Sleeping
| import os | |
| import streamlit as st | |
| from PyPDF2 import PdfReader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain.vectorstores import FAISS | |
| from groq import Groq | |
| import requests | |
| # Helper function to download and load the PDF from Google Drive | |
| def load_pdf_from_drive(output_path="downloaded_document.pdf"): | |
| drive_link = "https://drive.google.com/file/d/1SzVEuEdKi4dHeKgDrUbmoq1MShB-hyG4/view?usp=drive_link" | |
| file_id = drive_link.split("/d/")[1].split("/")[0] | |
| download_url = f"https://drive.google.com/uc?export=download&id={file_id}" | |
| response = requests.get(download_url) | |
| with open(output_path, "wb") as f: | |
| f.write(response.content) | |
| return output_path | |
| # Helper function to parse the PDF | |
| def load_pdf_content(pdf_path): | |
| reader = PdfReader(pdf_path) | |
| text = "" | |
| for page in reader.pages: | |
| text += page.extract_text() | |
| return text | |
| # Define the Streamlit app | |
| st.title("RAG-Based Application with Groq API") | |
| st.write("Processing a predefined PDF document from Google Drive to create a vector database and interact with it.") | |
| st.write("Downloading and processing the document...") | |
| # Download and load content from the PDF | |
| pdf_path = load_pdf_from_drive() | |
| document_text = load_pdf_content(pdf_path) | |
| # Split the text into manageable chunks | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=1000, chunk_overlap=200 | |
| ) | |
| text_chunks = text_splitter.split_text(document_text) | |
| st.write(f"Document split into {len(text_chunks)} chunks.") | |
| # Initialize embedding function | |
| embedding_function = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
| # Create FAISS vector database | |
| faiss_index = FAISS.from_texts(text_chunks, embedding=embedding_function) | |
| st.write("Vector database created successfully.") | |
| # Save the FAISS index | |
| faiss_index.save_local("faiss_index") | |
| # Initialize Groq client for querying | |
| GROQ_API_KEY = "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" | |
| client = Groq(api_key=GROQ_API_KEY) | |
| # Chat interaction setup | |
| st.write("Ask a question related to the document:") | |
| user_query = st.text_input("Your question:") | |
| if user_query: | |
| query_response = client.chat.completions.create( | |
| messages=[ | |
| {"role": "user", "content": user_query} | |
| ], | |
| model="llama-3.3-70b-versatile", | |
| ) | |
| st.write("Response:") | |
| st.write(query_response.choices[0].message.content) |