Spaces:

NHZ
/

First_Aid_Kit

Sleeping

App Files Files Community

First_Aid_Kit / app.py

NHZ

Update app.py

f2b5907 verified over 1 year ago

raw

history blame

3.89 kB

	import os
	import requests
	import numpy as np
	import faiss
	from PyPDF2 import PdfReader
	from sentence_transformers import SentenceTransformer
	from langchain.vectorstores import FAISS
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain.chains import RetrievalQA
	from langchain.prompts import PromptTemplate
	from langchain.llms.base import LLM
	from pydantic import Field
	from typing import Optional, List
	import streamlit as st

	# Custom wrapper for Groq API
	class GroqLLM(LLM):
	api_key: str = Field(..., description="API key for Groq")
	model: str = "llama-3.3-70b-versatile"

	@property
	def _llm_type(self) -> str:
	return "groq"

	def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
	headers = {"Authorization": f"Bearer {self.api_key}"}
	json_data = {
	"model": self.model,
	"messages": [{"role": "user", "content": prompt}],
	}

	response = requests.post(
	"https://api.groq.com/v1/chat/completions", headers=headers, json=json_data
	)

	if response.status_code != 200:
	raise ValueError(f"Groq API call failed: {response.status_code}, {response.text}")

	data = response.json()
	return data["choices"][0]["message"]["content"]

	# Initialize Groq API LLM with explicit API key
	llm = GroqLLM(api_key="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")

	# Function to extract content from a public Google Drive PDF link
	def extract_pdf_content(drive_url):
	file_id = drive_url.split("/d/")[1].split("/view")[0]
	download_url = f"https://drive.google.com/uc?export=download&id={file_id}"
	response = requests.get(download_url)
	if response.status_code != 200:
	return None

	with open("document.pdf", "wb") as f:
	f.write(response.content)

	reader = PdfReader("document.pdf")
	text = ""
	for page in reader.pages:
	text += page.extract_text()
	return text

	# Function to create a FAISS vector store from the document content
	def create_vector_store(text):
	sentences = text.split(". ")
	embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
	vector_store = FAISS.from_texts(sentences, embedding=embeddings)
	return vector_store, sentences

	# Streamlit app
	st.title("RAG-based Application with Focused Context")

	# Predefined Google Drive link
	drive_url = "https://drive.google.com/file/d/1XvqA1OIssRs2gbmOtKFKj-02yQ5X2yg0/view?usp=sharing"

	# Extract document content
	st.write("Extracting content from the document...")
	text = extract_pdf_content(drive_url)
	if text:
	st.write("Document extracted successfully!")

	st.write("Creating vector store...")
	vector_store, sentences = create_vector_store(text)

	st.write("Vector store created successfully!")

	query = st.text_input("Enter your query:")
	if query:
	st.write("Retrieving relevant context from the document...")
	retriever = vector_store.as_retriever()
	retriever.search_kwargs["k"] = 3 # Retrieve top 3 matches

	# Define a prompt template to guide LLM response generation
	prompt_template = PromptTemplate(
	template="""
	Use the following context to answer the question:

	{context}

	Question: {question}
	Answer:""",
	input_variables=["context", "question"]
	)

	# Create a RetrievalQA chain
	qa_chain = RetrievalQA.from_chain_type(
	retriever=retriever,
	llm=llm,
	chain_type="stuff", # Use the default chain type
	return_source_documents=True # Optional
	)

	# Run the query through the QA chain
	result = qa_chain.run(query)
	st.write("Answer:", result)
	else:
	st.error("Failed to extract content from the document.")