freddyaboulton HF Staff commited on
Commit
0f3353b
·
verified ·
1 Parent(s): ea590c2

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. README.md +1 -1
  2. run.ipynb +1 -1
  3. run.py +1 -1
README.md CHANGED
@@ -5,7 +5,7 @@ emoji: 🔥
5
  colorFrom: indigo
6
  colorTo: indigo
7
  sdk: gradio
8
- sdk_version: 5.42.0
9
  app_file: run.py
10
  pinned: false
11
  hf_oauth: true
 
5
  colorFrom: indigo
6
  colorTo: indigo
7
  sdk: gradio
8
+ sdk_version: 5.43.0
9
  app_file: run.py
10
  pinned: false
11
  hf_oauth: true
run.ipynb CHANGED
@@ -1 +1 @@
1
- {"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: highlight_pdf"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio gradio_pdf>=0.0.22 pymupdf>=1.25.3"]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["# Downloading files from the demo repo\n", "import os\n", "!wget -q https://github.com/gradio-app/gradio/raw/main/demo/highlight_pdf/Lorem_ipsum.pdf"]}, {"cell_type": "code", "execution_count": null, "id": "44380577570523278879349135829904343037", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "from gradio_pdf import PDF\n", "import pymupdf\n", "import os\n", "from pathlib import Path\n", "\n", "current_dir = Path(os.path.abspath(''))\n", "\n", "def highlight_text_in_pdf(pdf_file: Path, highlight_text: str):\n", " page_number = 0\n", " doc = pymupdf.open(pdf_file)\n", " for page in doc:\n", " text_instances = page.search_for(highlight_text)\n", " if len(text_instances) > 0:\n", " page_number = page.number\n", " for inst in text_instances:\n", " page.add_highlight_annot(inst)\n", "\n", " new_pdf_file = str(pdf_file.parents[0]) + \"/new_\" + pdf_file.name\n", " doc.save(new_pdf_file)\n", "\n", " if page_number is None:\n", " page_number = 0\n", " \n", " return new_pdf_file, page_number + 1\n", "\n", "def ask(query): \n", " result = f\"Something about : {query}\"\n", " sources = \"Document 1\"\n", " pdf_path = current_dir / \"Lorem_ipsum.pdf\"\n", " pdf_name = \"Document 1\"\n", " context_to_highlight = \"Ut velit mauris\"\n", "\n", " pdf, page_number = highlight_text_in_pdf(pdf_path, context_to_highlight)\n", " return result, sources + f\" - Page {page_number}\", PDF(pdf, label=pdf_name, starting_page=page_number, interactive=True) # type: ignore\n", "\n", "\n", "if __name__ == \"__main__\":\n", " with gr.Blocks() as demo:\n", " title = gr.HTML(f\"<center><h1>Bot</h1></center>\")\n", " with gr.Row():\n", " with gr.Column(scale=2):\n", " input = gr.Textbox(label=\"Question\", autofocus=True, interactive=True)\n", " btn = gr.Button(\"Ask\", variant=\"primary\")\n", " output = gr.Markdown(label=\"Anwser\")\n", " with gr.Column(scale=2):\n", " srcs = gr.Textbox(label=\"Sources\", interactive=False)\n", " pdf = PDF(label=\"Document\")\n", " \n", " btn.click(fn=ask, inputs=input, outputs=[output, srcs, pdf])\n", "\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
 
1
+ {"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: highlight_pdf"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio gradio_pdf>=0.0.22 pymupdf>=1.25.3"]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["# Downloading files from the demo repo\n", "import os\n", "!wget -q https://github.com/gradio-app/gradio/raw/main/demo/highlight_pdf/Lorem_ipsum.pdf"]}, {"cell_type": "code", "execution_count": null, "id": "44380577570523278879349135829904343037", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "from gradio_pdf import PDF\n", "import pymupdf # type: ignore\n", "import os\n", "from pathlib import Path\n", "\n", "current_dir = Path(os.path.abspath(''))\n", "\n", "def highlight_text_in_pdf(pdf_file: Path, highlight_text: str):\n", " page_number = 0\n", " doc = pymupdf.open(pdf_file)\n", " for page in doc:\n", " text_instances = page.search_for(highlight_text)\n", " if len(text_instances) > 0:\n", " page_number = page.number\n", " for inst in text_instances:\n", " page.add_highlight_annot(inst)\n", "\n", " new_pdf_file = str(pdf_file.parents[0]) + \"/new_\" + pdf_file.name\n", " doc.save(new_pdf_file)\n", "\n", " if page_number is None:\n", " page_number = 0\n", " \n", " return new_pdf_file, page_number + 1\n", "\n", "def ask(query): \n", " result = f\"Something about : {query}\"\n", " sources = \"Document 1\"\n", " pdf_path = current_dir / \"Lorem_ipsum.pdf\"\n", " pdf_name = \"Document 1\"\n", " context_to_highlight = \"Ut velit mauris\"\n", "\n", " pdf, page_number = highlight_text_in_pdf(pdf_path, context_to_highlight)\n", " return result, sources + f\" - Page {page_number}\", PDF(pdf, label=pdf_name, starting_page=page_number, interactive=True) # type: ignore\n", "\n", "\n", "if __name__ == \"__main__\":\n", " with gr.Blocks() as demo:\n", " title = gr.HTML(f\"<center><h1>Bot</h1></center>\")\n", " with gr.Row():\n", " with gr.Column(scale=2):\n", " input = gr.Textbox(label=\"Question\", autofocus=True, interactive=True)\n", " btn = gr.Button(\"Ask\", variant=\"primary\")\n", " output = gr.Markdown(label=\"Anwser\")\n", " with gr.Column(scale=2):\n", " srcs = gr.Textbox(label=\"Sources\", interactive=False)\n", " pdf = PDF(label=\"Document\")\n", " \n", " btn.click(fn=ask, inputs=input, outputs=[output, srcs, pdf])\n", "\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
run.py CHANGED
@@ -1,6 +1,6 @@
1
  import gradio as gr
2
  from gradio_pdf import PDF
3
- import pymupdf
4
  import os
5
  from pathlib import Path
6
 
 
1
  import gradio as gr
2
  from gradio_pdf import PDF
3
+ import pymupdf # type: ignore
4
  import os
5
  from pathlib import Path
6