Spaces:
Sleeping
Sleeping
| import os | |
| import requests | |
| import numpy as np | |
| import faiss | |
| from PyPDF2 import PdfReader | |
| from sentence_transformers import SentenceTransformer | |
| from langchain.vectorstores import FAISS | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain.chains import RetrievalQA | |
| from langchain.prompts import PromptTemplate | |
| from langchain.llms.base import LLM | |
| from pydantic import Field | |
| from typing import Optional, List | |
| import streamlit as st | |
| # Custom wrapper for Groq API | |
| class GroqLLM(LLM): | |
| api_key: str = Field(..., description="API key for Groq") | |
| model: str = "llama-3.3-70b-versatile" | |
| def _llm_type(self) -> str: | |
| return "groq" | |
| def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: | |
| headers = {"Authorization": f"Bearer {self.api_key}"} | |
| json_data = { | |
| "model": self.model, | |
| "messages": [{"role": "user", "content": prompt}], | |
| } | |
| response = requests.post( | |
| "https://api.groq.com/v1/chat/completions", headers=headers, json=json_data | |
| ) | |
| if response.status_code != 200: | |
| raise ValueError(f"Groq API call failed: {response.status_code}, {response.text}") | |
| data = response.json() | |
| return data["choices"][0]["message"]["content"] | |
| # Initialize Groq API LLM with explicit API key | |
| llm = GroqLLM(api_key="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") | |
| # Function to extract content from a public Google Drive PDF link | |
| def extract_pdf_content(drive_url): | |
| file_id = drive_url.split("/d/")[1].split("/view")[0] | |
| download_url = f"https://drive.google.com/uc?export=download&id={file_id}" | |
| response = requests.get(download_url) | |
| if response.status_code != 200: | |
| return None | |
| with open("document.pdf", "wb") as f: | |
| f.write(response.content) | |
| reader = PdfReader("document.pdf") | |
| text = "" | |
| for page in reader.pages: | |
| text += page.extract_text() | |
| return text | |
| # Function to create a FAISS vector store from the document content | |
| def create_vector_store(text): | |
| sentences = text.split(". ") | |
| embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
| vector_store = FAISS.from_texts(sentences, embedding=embeddings) | |
| return vector_store, sentences | |
| # Streamlit app | |
| st.title("RAG-based Application with Focused Context") | |
| # Predefined Google Drive link | |
| drive_url = "https://drive.google.com/file/d/1XvqA1OIssRs2gbmOtKFKj-02yQ5X2yg0/view?usp=sharing" | |
| # Extract document content | |
| st.write("Extracting content from the document...") | |
| text = extract_pdf_content(drive_url) | |
| if text: | |
| st.write("Document extracted successfully!") | |
| st.write("Creating vector store...") | |
| vector_store, sentences = create_vector_store(text) | |
| st.write("Vector store created successfully!") | |
| query = st.text_input("Enter your query:") | |
| if query: | |
| st.write("Retrieving relevant context from the document...") | |
| retriever = vector_store.as_retriever() | |
| retriever.search_kwargs["k"] = 3 # Retrieve top 3 matches | |
| # Define a prompt template to guide LLM response generation | |
| prompt_template = PromptTemplate( | |
| template=""" | |
| Use the following context to answer the question: | |
| {context} | |
| Question: {question} | |
| Answer:""", | |
| input_variables=["context", "question"] | |
| ) | |
| # Create a RetrievalQA chain | |
| qa_chain = RetrievalQA.from_chain_type( | |
| retriever=retriever, | |
| llm=llm, | |
| chain_type="stuff", # Use the default chain type | |
| return_source_documents=True # Optional | |
| ) | |
| # Run the query through the QA chain | |
| result = qa_chain.run(query) | |
| st.write("Answer:", result) | |
| else: | |
| st.error("Failed to extract content from the document.") | |