Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files
README.md
CHANGED
|
@@ -5,7 +5,7 @@ emoji: 🔥
|
|
| 5 |
colorFrom: indigo
|
| 6 |
colorTo: indigo
|
| 7 |
sdk: gradio
|
| 8 |
-
sdk_version: 5.
|
| 9 |
app_file: run.py
|
| 10 |
pinned: false
|
| 11 |
hf_oauth: true
|
|
|
|
| 5 |
colorFrom: indigo
|
| 6 |
colorTo: indigo
|
| 7 |
sdk: gradio
|
| 8 |
+
sdk_version: 5.43.0
|
| 9 |
app_file: run.py
|
| 10 |
pinned: false
|
| 11 |
hf_oauth: true
|
run.ipynb
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: highlight_pdf"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio gradio_pdf>=0.0.22 pymupdf>=1.25.3"]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["# Downloading files from the demo repo\n", "import os\n", "!wget -q https://github.com/gradio-app/gradio/raw/main/demo/highlight_pdf/Lorem_ipsum.pdf"]}, {"cell_type": "code", "execution_count": null, "id": "44380577570523278879349135829904343037", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "from gradio_pdf import PDF\n", "import pymupdf\n", "import os\n", "from pathlib import Path\n", "\n", "current_dir = Path(os.path.abspath(''))\n", "\n", "def highlight_text_in_pdf(pdf_file: Path, highlight_text: str):\n", " page_number = 0\n", " doc = pymupdf.open(pdf_file)\n", " for page in doc:\n", " text_instances = page.search_for(highlight_text)\n", " if len(text_instances) > 0:\n", " page_number = page.number\n", " for inst in text_instances:\n", " page.add_highlight_annot(inst)\n", "\n", " new_pdf_file = str(pdf_file.parents[0]) + \"/new_\" + pdf_file.name\n", " doc.save(new_pdf_file)\n", "\n", " if page_number is None:\n", " page_number = 0\n", " \n", " return new_pdf_file, page_number + 1\n", "\n", "def ask(query): \n", " result = f\"Something about : {query}\"\n", " sources = \"Document 1\"\n", " pdf_path = current_dir / \"Lorem_ipsum.pdf\"\n", " pdf_name = \"Document 1\"\n", " context_to_highlight = \"Ut velit mauris\"\n", "\n", " pdf, page_number = highlight_text_in_pdf(pdf_path, context_to_highlight)\n", " return result, sources + f\" - Page {page_number}\", PDF(pdf, label=pdf_name, starting_page=page_number, interactive=True) # type: ignore\n", "\n", "\n", "if __name__ == \"__main__\":\n", " with gr.Blocks() as demo:\n", " title = gr.HTML(f\"<center><h1>Bot</h1></center>\")\n", " with gr.Row():\n", " with gr.Column(scale=2):\n", " input = gr.Textbox(label=\"Question\", autofocus=True, interactive=True)\n", " btn = gr.Button(\"Ask\", variant=\"primary\")\n", " output = gr.Markdown(label=\"Anwser\")\n", " with gr.Column(scale=2):\n", " srcs = gr.Textbox(label=\"Sources\", interactive=False)\n", " pdf = PDF(label=\"Document\")\n", " \n", " btn.click(fn=ask, inputs=input, outputs=[output, srcs, pdf])\n", "\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
|
|
|
|
| 1 |
+
{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: highlight_pdf"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio gradio_pdf>=0.0.22 pymupdf>=1.25.3"]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["# Downloading files from the demo repo\n", "import os\n", "!wget -q https://github.com/gradio-app/gradio/raw/main/demo/highlight_pdf/Lorem_ipsum.pdf"]}, {"cell_type": "code", "execution_count": null, "id": "44380577570523278879349135829904343037", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "from gradio_pdf import PDF\n", "import pymupdf # type: ignore\n", "import os\n", "from pathlib import Path\n", "\n", "current_dir = Path(os.path.abspath(''))\n", "\n", "def highlight_text_in_pdf(pdf_file: Path, highlight_text: str):\n", " page_number = 0\n", " doc = pymupdf.open(pdf_file)\n", " for page in doc:\n", " text_instances = page.search_for(highlight_text)\n", " if len(text_instances) > 0:\n", " page_number = page.number\n", " for inst in text_instances:\n", " page.add_highlight_annot(inst)\n", "\n", " new_pdf_file = str(pdf_file.parents[0]) + \"/new_\" + pdf_file.name\n", " doc.save(new_pdf_file)\n", "\n", " if page_number is None:\n", " page_number = 0\n", " \n", " return new_pdf_file, page_number + 1\n", "\n", "def ask(query): \n", " result = f\"Something about : {query}\"\n", " sources = \"Document 1\"\n", " pdf_path = current_dir / \"Lorem_ipsum.pdf\"\n", " pdf_name = \"Document 1\"\n", " context_to_highlight = \"Ut velit mauris\"\n", "\n", " pdf, page_number = highlight_text_in_pdf(pdf_path, context_to_highlight)\n", " return result, sources + f\" - Page {page_number}\", PDF(pdf, label=pdf_name, starting_page=page_number, interactive=True) # type: ignore\n", "\n", "\n", "if __name__ == \"__main__\":\n", " with gr.Blocks() as demo:\n", " title = gr.HTML(f\"<center><h1>Bot</h1></center>\")\n", " with gr.Row():\n", " with gr.Column(scale=2):\n", " input = gr.Textbox(label=\"Question\", autofocus=True, interactive=True)\n", " btn = gr.Button(\"Ask\", variant=\"primary\")\n", " output = gr.Markdown(label=\"Anwser\")\n", " with gr.Column(scale=2):\n", " srcs = gr.Textbox(label=\"Sources\", interactive=False)\n", " pdf = PDF(label=\"Document\")\n", " \n", " btn.click(fn=ask, inputs=input, outputs=[output, srcs, pdf])\n", "\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
|
run.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
from gradio_pdf import PDF
|
| 3 |
-
import pymupdf
|
| 4 |
import os
|
| 5 |
from pathlib import Path
|
| 6 |
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
from gradio_pdf import PDF
|
| 3 |
+
import pymupdf # type: ignore
|
| 4 |
import os
|
| 5 |
from pathlib import Path
|
| 6 |
|