First_Aid_Kit / app.py
NHZ's picture
Update app.py
f2b5907 verified
raw
history blame
3.89 kB
import os
import requests
import numpy as np
import faiss
from PyPDF2 import PdfReader
from sentence_transformers import SentenceTransformer
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.llms.base import LLM
from pydantic import Field
from typing import Optional, List
import streamlit as st
# Custom wrapper for Groq API
class GroqLLM(LLM):
api_key: str = Field(..., description="API key for Groq")
model: str = "llama-3.3-70b-versatile"
@property
def _llm_type(self) -> str:
return "groq"
def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
headers = {"Authorization": f"Bearer {self.api_key}"}
json_data = {
"model": self.model,
"messages": [{"role": "user", "content": prompt}],
}
response = requests.post(
"https://api.groq.com/v1/chat/completions", headers=headers, json=json_data
)
if response.status_code != 200:
raise ValueError(f"Groq API call failed: {response.status_code}, {response.text}")
data = response.json()
return data["choices"][0]["message"]["content"]
# Initialize Groq API LLM with explicit API key
llm = GroqLLM(api_key="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")
# Function to extract content from a public Google Drive PDF link
def extract_pdf_content(drive_url):
file_id = drive_url.split("/d/")[1].split("/view")[0]
download_url = f"https://drive.google.com/uc?export=download&id={file_id}"
response = requests.get(download_url)
if response.status_code != 200:
return None
with open("document.pdf", "wb") as f:
f.write(response.content)
reader = PdfReader("document.pdf")
text = ""
for page in reader.pages:
text += page.extract_text()
return text
# Function to create a FAISS vector store from the document content
def create_vector_store(text):
sentences = text.split(". ")
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector_store = FAISS.from_texts(sentences, embedding=embeddings)
return vector_store, sentences
# Streamlit app
st.title("RAG-based Application with Focused Context")
# Predefined Google Drive link
drive_url = "https://drive.google.com/file/d/1XvqA1OIssRs2gbmOtKFKj-02yQ5X2yg0/view?usp=sharing"
# Extract document content
st.write("Extracting content from the document...")
text = extract_pdf_content(drive_url)
if text:
st.write("Document extracted successfully!")
st.write("Creating vector store...")
vector_store, sentences = create_vector_store(text)
st.write("Vector store created successfully!")
query = st.text_input("Enter your query:")
if query:
st.write("Retrieving relevant context from the document...")
retriever = vector_store.as_retriever()
retriever.search_kwargs["k"] = 3 # Retrieve top 3 matches
# Define a prompt template to guide LLM response generation
prompt_template = PromptTemplate(
template="""
Use the following context to answer the question:
{context}
Question: {question}
Answer:""",
input_variables=["context", "question"]
)
# Create a RetrievalQA chain
qa_chain = RetrievalQA.from_chain_type(
retriever=retriever,
llm=llm,
chain_type="stuff", # Use the default chain type
return_source_documents=True # Optional
)
# Run the query through the QA chain
result = qa_chain.run(query)
st.write("Answer:", result)
else:
st.error("Failed to extract content from the document.")