Spaces:

NHZ
/

First_Aid_Kit

Sleeping

App Files Files Community

First_Aid_Kit / app.py

NHZ

Update app.py

f2ab7e6 verified over 1 year ago

raw

history blame

2.2 kB

	import os
	import re
	import torch
	import numpy as np
	from langchain.vectorstores import FAISS
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain.document_loaders import PyPDFLoader
	from langchain.text_splitter import CharacterTextSplitter
	from langchain.chains.question_answering import load_qa_chain
	from langchain.prompts import PromptTemplate
	from langchain.llms import HuggingFaceHub
	import streamlit as st

	# Environment setup
	HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
	if not HUGGINGFACEHUB_API_TOKEN:
	raise ValueError("HuggingFace API Token is missing.")

	# Initialize HuggingFace embeddings model
	embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

	# Load PDF document from Google Drive
	pdf_url = "https://drive.google.com/uc?id=1XvqA1OIssRs2gbmOtKFKj-02yQ5X2yg0"
	loader = PyPDFLoader(pdf_url)
	documents = loader.load()

	# Split text into chunks
	text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
	texts = text_splitter.split_documents(documents)

	# Create FAISS vector database
	db = FAISS.from_documents(texts, embeddings)

	# Initialize HuggingFace LLM (example model, replace as needed)
	llm = HuggingFaceHub(repo_id="bigscience/bloom", model_kwargs={"temperature": 0, "max_length": 512})

	# Define custom prompt
	prompt_template = """
	Use the following pieces of context to answer the question at the end.
	If the question cannot be answered based on the context, say "I don't know."

	Context:
	{context}

	Question:
	{question}

	Answer:
	"""
	prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])

	# Load QA chain
	qa_chain = load_qa_chain(llm, chain_type="stuff", prompt=prompt)

	# Streamlit frontend
	st.title("RAG-based Document Q&A")
	st.write("Upload a document and ask questions about it.")

	query = st.text_input("Enter your question:")
	if query:
	# Search vector database
	docs = db.similarity_search(query, k=4)

	# Get relevant context
	context = "\n\n".join([doc.page_content for doc in docs])

	# Generate answer using LLM
	answer = qa_chain.run({"context": context, "question": query})
	st.write("Answer:", answer)