Spaces:
Build error
Build error
| import os | |
| import faiss | |
| import numpy as np | |
| import streamlit as st | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.vectorstores.faiss import FAISS | |
| from langchain.document_loaders import PyPDFLoader | |
| from sentence_transformers import SentenceTransformer | |
| from groq import Groq | |
| # Initialize Groq API Client | |
| GROQ_API_KEY = os.getenv("GROQ_API_KEY") | |
| client = Groq(api_key=GROQ_API_KEY) | |
| # Load sentence-transformer model (open-source embedding model) | |
| embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") | |
| # Function to process PDF and create embeddings | |
| def process_pdf(pdf_file): | |
| loader = PyPDFLoader(pdf_file) | |
| documents = loader.load() | |
| # Chunk text | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=50) | |
| chunks = text_splitter.split_documents(documents) | |
| # Generate embeddings | |
| texts = [chunk.page_content for chunk in chunks] | |
| embeddings = embedding_model.encode(texts, convert_to_numpy=True) | |
| # Store embeddings in FAISS | |
| dimension = embeddings.shape[1] | |
| index = faiss.IndexFlatL2(dimension) | |
| index.add(embeddings) | |
| faiss.write_index(index, "faiss_index") | |
| return texts, index | |
| # Function to search in FAISS | |
| def search_faiss(query, texts, index, top_k=3): | |
| query_embedding = embedding_model.encode([query], convert_to_numpy=True) | |
| D, I = index.search(query_embedding, top_k) | |
| results = [texts[i] for i in I[0] if i != -1] | |
| return results | |
| # Function to retrieve response from Groq API | |
| def get_groq_response(query): | |
| chat_completion = client.chat.completions.create( | |
| messages=[{"role": "user", "content": query}], | |
| model="llama-3.3-70b-versatile" | |
| ) | |
| return chat_completion.choices[0].message.content | |
| # Streamlit UI | |
| st.title("RAG-Based PDF Chatbot") | |
| uploaded_file = st.file_uploader("Upload a PDF", type="pdf") | |
| if uploaded_file: | |
| with open("uploaded_document.pdf", "wb") as f: | |
| f.write(uploaded_file.getbuffer()) | |
| st.success("PDF uploaded successfully!") | |
| texts, index = process_pdf("uploaded_document.pdf") | |
| query = st.text_input("Ask a question about the document:") | |
| if query: | |
| docs = search_faiss(query, texts, index) | |
| context = " ".join(docs) | |
| final_query = f"Context: {context} \n\n Question: {query}" | |
| response = get_groq_response(final_query) | |
| st.write("Response:", response) | |