Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from langchain import HuggingFacePipeline | |
| from transformers import pipeline | |
| from langchain.document_loaders import PyPDFLoader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain_community.vectorstores import Chroma | |
| from langchain.chains import RetrievalQA | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| import os | |
| # --- Load free Hugging Face LLM --- | |
| # You can switch to another free model if you like | |
| hf_pipeline = pipeline( | |
| "text-generation", | |
| model="tiiuae/falcon-7b-instruct", | |
| max_new_tokens=256, | |
| temperature=0.5 | |
| ) | |
| llm = HuggingFacePipeline(pipeline=hf_pipeline) | |
| # --- PDF Loader --- | |
| def document_loader(file): | |
| loader = PyPDFLoader(file.name) | |
| return loader.load() | |
| # --- Text Splitter -- | |
| def text_splitter(documents): | |
| splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=500, | |
| chunk_overlap=50 | |
| ) | |
| return splitter.split_documents(documents) | |
| # --- Vector Database using embeddings --- | |
| def vector_database(chunks): | |
| embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
| vectordb = Chroma.from_documents(chunks, embeddings) | |
| return vectordb | |
| # --- Build retriever--- | |
| def retriever(file): | |
| chunks = text_splitter(document_loader(file)) | |
| vectordb = vector_database(chunks) | |
| return vectordb.as_retriever() | |
| # --- RetrievalQA --- | |
| def retriever_qa(file, query): | |
| retriever_obj = retriever(file) | |
| qa_chain = RetrievalQA.from_chain_type( | |
| llm=llm, | |
| chain_type="stuff", | |
| retriever=retriever_obj, | |
| return_source_documents=False | |
| ) | |
| return qa_chain.run(query) | |
| # --- Gradio Interface -- | |
| app = gr.Interface( | |
| fn=retriever_qa, | |
| inputs=[ | |
| gr.File(label="Upload PDF", file_types=['.pdf']), | |
| gr.Textbox(label="Ask a question about the PDF", lines=2, placeholder="Type your question here...") | |
| ], | |
| outputs=gr.Textbox(label="Answer"), | |
| title="Free LLM PDF Q&A Bot", | |
| description="Upload a PDF document and ask any question. This bot uses a free open-source LLM and vector search to answer your questions." | |
| ) | |
| if __name__ == "__main__": | |
| app.launch() | |