| import gradio as gr |
| from langchain import HuggingFacePipeline |
| from transformers import pipeline |
| from langchain.document_loaders import PyPDFLoader |
| from langchain.text_splitter import RecursiveCharacterTextSplitter |
| from langchain_community.vectorstores import Chroma |
| from langchain.chains import RetrievalQA |
| from langchain.embeddings import HuggingFaceEmbeddings |
|
|
| |
| |
| hf_pipeline = pipeline( |
| "text-generation", |
| model="tiiuae/falcon-7b-instruct", |
| max_new_tokens=256, |
| temperature=0.5 |
| ) |
| llm = HuggingFacePipeline(pipeline=hf_pipeline) |
|
|
| |
| def document_loader(file): |
| loader = PyPDFLoader(file.name) |
| return loader.load() |
|
|
| |
| def text_splitter(documents): |
| splitter = RecursiveCharacterTextSplitter( |
| chunk_size=1000, |
| chunk_overlap=20 |
| ) |
| return splitter.split_documents(documents) |
|
|
| |
| def vector_database(chunks): |
| embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") |
| vectordb = Chroma.from_documents(chunks, embeddings) |
| return vectordb |
|
|
| |
| def retriever(file): |
| chunks = text_splitter(document_loader(file)) |
| vectordb = vector_database(chunks) |
| return vectordb.as_retriever() |
|
|
| |
| def retriever_qa(file, query): |
| retriever_obj = retriever(file) |
| qa_chain = RetrievalQA.from_chain_type( |
| llm=llm, |
| chain_type="stuff", |
| retriever=retriever_obj, |
| return_source_documents=False |
| ) |
| return qa_chain.run(query) |
|
|
| |
| app = gr.Interface( |
| fn=retriever_qa, |
| inputs=[ |
| gr.File(label="Upload PDF", file_types=['.pdf']), |
| gr.Textbox(label="Ask a question about the PDF", lines=2, placeholder="Type your question here...") |
| ], |
| outputs=gr.Textbox(label="Answer"), |
| title="Free LLM PDF Q&A Bot", |
| description="Upload a PDF document and ask any question. This bot uses a free open-source LLM and vector search to answer your questions." |
| ) |
|
|
| if __name__ == "__main__": |
| app.launch() |
|
|
|
|