| import gradio as gr |
| from huggingface_hub import InferenceClient |
| from langchain.chains import ConversationalRetrievalChain |
| from langchain.vectorstores import FAISS |
| from langchain.embeddings import HuggingFaceEmbeddings |
| from langchain.text_splitter import RecursiveCharacterTextSplitter |
| from langchain.document_loaders import PyPDFLoader |
| import tempfile |
|
|
| |
| vectorstore = None |
| retrieval_chain = None |
|
|
|
|
| def process_pdf(file): |
| global vectorstore, retrieval_chain |
|
|
| |
| tmp_path = file.name |
|
|
| |
| loader = PyPDFLoader(tmp_path) |
| documents = loader.load() |
|
|
| |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) |
| docs = text_splitter.split_documents(documents) |
|
|
| |
| embeddings = HuggingFaceEmbeddings() |
| vectorstore = FAISS.from_documents(docs, embeddings) |
|
|
| |
| retriever = vectorstore.as_retriever() |
| retriever.search_kwargs["k"] = 4 |
|
|
| client = InferenceClient(model="deepseek-ai/DeepSeek-R1-0528") |
|
|
| retrieval_chain = ConversationalRetrievalChain.from_llm( |
| llm=client, |
| retriever=retriever, |
| return_source_documents=True |
| ) |
|
|
| return "PDF processed. You can now ask questions!" |
|
|
|
|
| def respond( |
| message, |
| history: list[dict[str, str]], |
| system_message, |
| max_tokens, |
| temperature, |
| top_p, |
| hf_token: gr.OAuthToken, |
| ): |
| global retrieval_chain |
|
|
| if retrieval_chain is None: |
| return "Please upload a PDF first." |
|
|
| |
| chat_history = [(h["content"], h.get("response", "")) for h in history if h["role"] == "user"] |
|
|
| result = retrieval_chain({"question": message, "chat_history": chat_history}) |
|
|
| return result["answer"] |
|
|
|
|
| chatbot = gr.ChatInterface( |
| respond, |
| type="messages", |
| additional_inputs=[ |
| gr.Textbox(value="You are a friendly Chatbot.", label="System message"), |
| gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), |
| gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), |
| gr.Slider( |
| minimum=0.1, |
| maximum=1.0, |
| value=0.95, |
| step=0.05, |
| label="Top-p (nucleus sampling)", |
| ), |
| ], |
| ) |
|
|
| with gr.Blocks() as demo: |
| with gr.Sidebar(): |
| gr.LoginButton() |
| pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"]) |
| pdf_upload.upload(process_pdf, inputs=pdf_upload, outputs=[]) |
| chatbot.render() |
|
|
|
|
| if __name__ == "__main__": |
| demo.launch() |
|
|