Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- Lorem_ipsum.pdf +0 -0
- README.md +7 -7
- requirements.txt +2 -0
- run.ipynb +1 -0
- run.py +52 -0
Lorem_ipsum.pdf
ADDED
|
Binary file (42.3 kB). View file
|
|
|
README.md
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 5.20.1
|
| 8 |
-
app_file:
|
| 9 |
pinned: false
|
|
|
|
| 10 |
---
|
| 11 |
-
|
| 12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 1 |
+
|
| 2 |
---
|
| 3 |
+
title: highlight_pdf
|
| 4 |
+
emoji: 🔥
|
| 5 |
+
colorFrom: indigo
|
| 6 |
+
colorTo: indigo
|
| 7 |
sdk: gradio
|
| 8 |
sdk_version: 5.20.1
|
| 9 |
+
app_file: run.py
|
| 10 |
pinned: false
|
| 11 |
+
hf_oauth: true
|
| 12 |
---
|
|
|
|
|
|
requirements.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio_pdf>=0.0.22
|
| 2 |
+
pymupdf>=1.25.3
|
run.ipynb
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: highlight_pdf"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio gradio_pdf>=0.0.22 pymupdf>=1.25.3"]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["# Downloading files from the demo repo\n", "import os\n", "!wget -q https://github.com/gradio-app/gradio/raw/main/demo/highlight_pdf/Lorem_ipsum.pdf"]}, {"cell_type": "code", "execution_count": null, "id": "44380577570523278879349135829904343037", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "from gradio_pdf import PDF\n", "import pymupdf\n", "import os\n", "from pathlib import Path\n", "\n", "current_dir = Path(os.path.abspath(''))\n", "\n", "def highlight_text_in_pdf(pdf_file: Path, highlight_text: str):\n", " page_number = 0\n", " doc = pymupdf.open(pdf_file)\n", " for page in doc:\n", " text_instances = page.search_for(highlight_text)\n", " if len(text_instances) > 0:\n", " page_number = page.number\n", " for inst in text_instances:\n", " page.add_highlight_annot(inst)\n", "\n", " new_pdf_file = str(pdf_file.parents[0]) + \"/new_\" + pdf_file.name\n", " doc.save(new_pdf_file)\n", "\n", " if page_number is None:\n", " page_number = 0\n", " \n", " return new_pdf_file, page_number + 1\n", "\n", "def ask(query): \n", " result = f\"Something about : {query}\"\n", " sources = \"Document 1\"\n", " pdf_path = current_dir / \"Lorem_ipsum.pdf\"\n", " pdf_name = \"Document 1\"\n", " context_to_highlight = \"Ut velit mauris\"\n", "\n", " pdf, page_number = highlight_text_in_pdf(pdf_path, context_to_highlight)\n", " return result, sources + f\" - Page {page_number}\", PDF(pdf, label=pdf_name, starting_page=page_number, interactive=True)\n", "\n", "\n", "if __name__ == \"__main__\":\n", " with gr.Blocks() as demo:\n", " title = gr.HTML(f\"<center><h1>Bot</h1></center>\")\n", " with gr.Row():\n", " with gr.Column(scale=2):\n", " input = gr.Textbox(label=\"Question\", autofocus=True, interactive=True)\n", " btn = gr.Button(\"Ask\", variant=\"primary\")\n", " output = gr.Markdown(label=\"Anwser\")\n", " with gr.Column(scale=2):\n", " srcs = gr.Textbox(label=\"Sources\", interactive=False)\n", " pdf = PDF(label=\"Document\")\n", " \n", " btn.click(fn=ask, inputs=input, outputs=[output, srcs, pdf])\n", "\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
|
run.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from gradio_pdf import PDF
|
| 3 |
+
import pymupdf
|
| 4 |
+
import os
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
|
| 7 |
+
current_dir = Path(os.path.abspath(''))
|
| 8 |
+
|
| 9 |
+
def highlight_text_in_pdf(pdf_file: Path, highlight_text: str):
|
| 10 |
+
page_number = 0
|
| 11 |
+
doc = pymupdf.open(pdf_file)
|
| 12 |
+
for page in doc:
|
| 13 |
+
text_instances = page.search_for(highlight_text)
|
| 14 |
+
if len(text_instances) > 0:
|
| 15 |
+
page_number = page.number
|
| 16 |
+
for inst in text_instances:
|
| 17 |
+
page.add_highlight_annot(inst)
|
| 18 |
+
|
| 19 |
+
new_pdf_file = str(pdf_file.parents[0]) + "/new_" + pdf_file.name
|
| 20 |
+
doc.save(new_pdf_file)
|
| 21 |
+
|
| 22 |
+
if page_number is None:
|
| 23 |
+
page_number = 0
|
| 24 |
+
|
| 25 |
+
return new_pdf_file, page_number + 1
|
| 26 |
+
|
| 27 |
+
def ask(query):
|
| 28 |
+
result = f"Something about : {query}"
|
| 29 |
+
sources = "Document 1"
|
| 30 |
+
pdf_path = current_dir / "Lorem_ipsum.pdf"
|
| 31 |
+
pdf_name = "Document 1"
|
| 32 |
+
context_to_highlight = "Ut velit mauris"
|
| 33 |
+
|
| 34 |
+
pdf, page_number = highlight_text_in_pdf(pdf_path, context_to_highlight)
|
| 35 |
+
return result, sources + f" - Page {page_number}", PDF(pdf, label=pdf_name, starting_page=page_number, interactive=True)
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
if __name__ == "__main__":
|
| 39 |
+
with gr.Blocks() as demo:
|
| 40 |
+
title = gr.HTML(f"<center><h1>Bot</h1></center>")
|
| 41 |
+
with gr.Row():
|
| 42 |
+
with gr.Column(scale=2):
|
| 43 |
+
input = gr.Textbox(label="Question", autofocus=True, interactive=True)
|
| 44 |
+
btn = gr.Button("Ask", variant="primary")
|
| 45 |
+
output = gr.Markdown(label="Anwser")
|
| 46 |
+
with gr.Column(scale=2):
|
| 47 |
+
srcs = gr.Textbox(label="Sources", interactive=False)
|
| 48 |
+
pdf = PDF(label="Document")
|
| 49 |
+
|
| 50 |
+
btn.click(fn=ask, inputs=input, outputs=[output, srcs, pdf])
|
| 51 |
+
|
| 52 |
+
demo.launch()
|