doc-qa-bot / app.py
10tenfirestorm's picture
Update app.py
53a6ab3 verified
import os
import gradio as gr
from langchain_community.document_loaders import WebBaseLoader, PyMuPDFLoader
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.llms import HuggingFaceHub
from langchain.chains.question_answering import load_qa_chain
# Get the token from the secrets we just set
hf_token = os.environ.get("HF_TOKEN")
def load_pdf(file_path):
loader = PyMuPDFLoader(file_path)
docs = loader.load()
return docs
def load_website(url):
loader = WebBaseLoader(url)
docs = loader.load()
return docs
def setup_vector_store(docs):
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector_store = FAISS.from_documents(docs, embeddings)
return vector_store
def ask_question(query, vector_store):
retriever = vector_store.as_retriever()
docs = retriever.get_relevant_documents(query)
llm = HuggingFaceHub(
repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
model_kwargs={"temperature": 0.7, "max_length": 512},
huggingfacehub_api_token=hf_token
)
chain = load_qa_chain(llm, chain_type="stuff")
response = chain.run(input_documents=docs, question=query)
return response
def process_input(weblink, pdf_file, question):
docs = []
if not weblink and not pdf_file:
return "Please provide a website link or upload a PDF."
try:
if weblink:
docs.extend(load_website(weblink))
if pdf_file:
docs.extend(load_pdf(pdf_file.name))
vector_store = setup_vector_store(docs)
response = ask_question(question, vector_store)
return response
except Exception as e:
return f"Error: {str(e)}"
demo = gr.Interface(
fn=process_input,
inputs=[
gr.Textbox(label="Website Link (Optional)"),
gr.File(label="Upload PDF (Optional)"),
gr.Textbox(label="Ask a Question")
],
outputs=gr.Textbox(label="Final Answer"),
title="Web & PDF QA System"
)
if __name__ == "__main__":
demo.launch()