Spaces:
Runtime error
Runtime error
| import os | |
| import streamlit as st | |
| import numpy as np | |
| import faiss | |
| from groq import Groq | |
| from pydrive.auth import GoogleAuth | |
| from pydrive.drive import GoogleDrive | |
| from sentence_transformers import SentenceTransformer | |
| # Constants | |
| DRIVE_FILE_LINK = "https://drive.google.com/file/d/1kYGomSibXW-wCFptEMcWP12jOz1390OK/view?usp=drive_link" | |
| GROQ_MODEL = "llama-3.3-70b-versatile" | |
| # Authentication and setup for Google Drive | |
| def load_drive_content(file_link): | |
| gauth = GoogleAuth() | |
| gauth.LocalWebserverAuth() | |
| drive = GoogleDrive(gauth) | |
| file_id = file_link.split('/d/')[1].split('/view')[0] | |
| downloaded_file = drive.CreateFile({'id': file_id}) | |
| downloaded_file.GetContentFile("document.pdf") | |
| return "document.pdf" | |
| # Chunking and embedding creation | |
| def prepare_embeddings(document_path): | |
| from PyPDF2 import PdfReader | |
| reader = PdfReader(document_path) | |
| text = "" | |
| for page in reader.pages: | |
| text += page.extract_text() | |
| # Create chunks of 500 characters with a sliding window of 200 | |
| chunk_size = 500 | |
| chunk_overlap = 200 | |
| chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size - chunk_overlap)] | |
| # Embedding model | |
| embedder = SentenceTransformer("all-MiniLM-L6-v2") | |
| embeddings = embedder.encode(chunks, convert_to_tensor=True).detach().numpy() | |
| # Store in FAISS | |
| vector_dim = embeddings.shape[1] | |
| index = faiss.IndexFlatL2(vector_dim) | |
| index.add(embeddings) | |
| return chunks, index | |
| # Groq setup | |
| def groq_client(): | |
| return Groq(api_key=os.environ.get("GROQ_API_KEY")) | |
| # Retrieve and query vector DB | |
| def query_vector_db(query, chunks, index, embedder): | |
| query_embedding = embedder.encode([query], convert_to_tensor=True).detach().numpy() | |
| D, I = index.search(query_embedding, k=1) # Find top result | |
| if I[0][0] != -1: # Valid match | |
| return chunks[I[0][0]] | |
| return "No relevant content found." | |
| # Streamlit application | |
| def main(): | |
| st.title("RAG-based Application with Groq") | |
| # Load document and prepare FAISS | |
| st.info("Loading document and preparing FAISS...") | |
| document_path = load_drive_content(DRIVE_FILE_LINK) | |
| chunks, index = prepare_embeddings(document_path) | |
| embedder = SentenceTransformer("all-MiniLM-L6-v2") | |
| client = groq_client() | |
| # Interface | |
| user_input = st.text_input("Enter your query:") | |
| if user_input: | |
| context = query_vector_db(user_input, chunks, index, embedder) | |
| st.write("**Relevant Context:**", context) | |
| # Query Groq model | |
| with st.spinner("Querying Groq model..."): | |
| chat_completion = client.chat.completions.create( | |
| messages=[ | |
| {"role": "user", "content": f"Based on this context: {context}, {user_input}"} | |
| ], | |
| model=GROQ_MODEL, | |
| ) | |
| st.write("**Groq Model Response:**", chat_completion.choices[0].message.content) | |
| if __name__ == "__main__": | |
| main() | |