Upload folder using huggingface_hub
Browse files- requirements.txt +2 -1
- src/__pycache__/gradio_app.cpython-39.pyc +0 -0
- src/__pycache__/summarization.cpython-39.pyc +0 -0
- src/gradio_app.py +42 -55
- src/mailing.py +0 -0
- src/summarization.py +44 -0
requirements.txt
CHANGED
|
@@ -9,4 +9,5 @@ grpcio-tools==1.54.2
|
|
| 9 |
gpt_index==0.4.24
|
| 10 |
langchain==0.0.190
|
| 11 |
environs==9.5.0
|
| 12 |
-
pypdf==3.9.1
|
|
|
|
|
|
| 9 |
gpt_index==0.4.24
|
| 10 |
langchain==0.0.190
|
| 11 |
environs==9.5.0
|
| 12 |
+
pypdf==3.9.1
|
| 13 |
+
pypdfium2==4.18.0
|
src/__pycache__/gradio_app.cpython-39.pyc
CHANGED
|
Binary files a/src/__pycache__/gradio_app.cpython-39.pyc and b/src/__pycache__/gradio_app.cpython-39.pyc differ
|
|
|
src/__pycache__/summarization.cpython-39.pyc
CHANGED
|
Binary files a/src/__pycache__/summarization.cpython-39.pyc and b/src/__pycache__/summarization.cpython-39.pyc differ
|
|
|
src/gradio_app.py
CHANGED
|
@@ -1,9 +1,32 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
from langchain.chat_models import ChatOpenAI
|
| 3 |
-
from src.summarization import
|
| 4 |
-
from src.prompts import prompts
|
| 5 |
import os
|
| 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
def switch_buttons(interactive: bool):
|
| 9 |
"""This switches the buttons to interactive or not interactive.
|
|
@@ -24,49 +47,6 @@ def switch_buttons(interactive: bool):
|
|
| 24 |
)
|
| 25 |
|
| 26 |
|
| 27 |
-
def summarize_wrapper(
|
| 28 |
-
file: str, llm: ChatOpenAI, summarization_type: str, summarization_kwargs: dict
|
| 29 |
-
) -> str:
|
| 30 |
-
"""Wrapper for the summarization function to make it compatible with gradio.
|
| 31 |
-
|
| 32 |
-
Args:
|
| 33 |
-
file (str): Path to the file. This can either be a local path or a tempfile.TemporaryFileWrapper_.
|
| 34 |
-
llm (ChatOpenAI): Language model.
|
| 35 |
-
summarization_type (str): Type of summarization. Can be either "short", "middle" or "long".
|
| 36 |
-
summarization_kwargs (dict): Keyword arguments for the summarization.
|
| 37 |
-
|
| 38 |
-
Returns:
|
| 39 |
-
str: Summarization of the file.
|
| 40 |
-
"""
|
| 41 |
-
if summarization_type == "short":
|
| 42 |
-
summarization_kwargs.update(
|
| 43 |
-
dict(
|
| 44 |
-
map_prompt=prompts["short_de"]["map_prompt"],
|
| 45 |
-
combine_prompt=prompts["short_de"]["combine_prompt"],
|
| 46 |
-
)
|
| 47 |
-
)
|
| 48 |
-
elif summarization_type == "middle":
|
| 49 |
-
summarization_kwargs.update(
|
| 50 |
-
dict(
|
| 51 |
-
map_prompt=prompts["middle_de"]["map_prompt"],
|
| 52 |
-
combine_prompt=prompts["middle_de"]["combine_prompt"],
|
| 53 |
-
)
|
| 54 |
-
)
|
| 55 |
-
elif summarization_type == "long":
|
| 56 |
-
summarization_kwargs.update(
|
| 57 |
-
dict(
|
| 58 |
-
map_prompt=prompts["long_de"]["map_prompt"],
|
| 59 |
-
combine_prompt=prompts["long_de"]["combine_prompt"],
|
| 60 |
-
)
|
| 61 |
-
)
|
| 62 |
-
else:
|
| 63 |
-
raise ValueError(f"Summarization type {summarization_type} is not supported.")
|
| 64 |
-
|
| 65 |
-
return summarize(
|
| 66 |
-
file_path=file.name, llm=llm[0], summarization_kwargs=summarization_kwargs
|
| 67 |
-
)
|
| 68 |
-
|
| 69 |
-
|
| 70 |
def run_summarization_model_gradio(
|
| 71 |
llm: ChatOpenAI,
|
| 72 |
share_gradio_via_link: bool = False,
|
|
@@ -91,29 +71,36 @@ def run_summarization_model_gradio(
|
|
| 91 |
) as webui:
|
| 92 |
with gr.Row().style(equal_height=True):
|
| 93 |
Header_box = generate_title(title=title, description=description)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
with gr.Row().style(equal_height=True):
|
| 95 |
summary_short = gr.Button("Kurze Zusammenfassung", interactive=False)
|
| 96 |
summary_middle = gr.Button("Mittlere Zusammenfassung", interactive=False)
|
| 97 |
summary_long = gr.Button("Lange Zusammenfassung", interactive=False)
|
| 98 |
with gr.Row().style(equal_height=True):
|
| 99 |
with gr.Column(scale=1):
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
)
|
| 105 |
-
with gr.Column(scale=
|
| 106 |
-
|
| 107 |
-
|
| 108 |
)
|
| 109 |
|
| 110 |
-
# Once a file is uploaded, enable the summarization buttons
|
| 111 |
file_upload.upload(
|
| 112 |
switch_buttons,
|
| 113 |
[gr.State(True)],
|
| 114 |
[summary_short, summary_middle, summary_long],
|
| 115 |
queue=False,
|
| 116 |
-
)
|
| 117 |
|
| 118 |
# If you click any button first disable all buttons, then summarzize and then enable the clicked button
|
| 119 |
for s, summarization_type in [
|
|
@@ -144,7 +131,6 @@ def run_summarization_model_gradio(
|
|
| 144 |
)
|
| 145 |
|
| 146 |
# The clear button clears the dashboard
|
| 147 |
-
clear = gr.Button("Clear")
|
| 148 |
clear.click(lambda: None, None, summary_output, queue=False).then(
|
| 149 |
lambda: None, None, file_upload, queue=False
|
| 150 |
).then(
|
|
@@ -167,6 +153,7 @@ def run_summarization_model_gradio(
|
|
| 167 |
|
| 168 |
|
| 169 |
def generate_title(title: str, description: str):
|
|
|
|
| 170 |
return gr.HTML(
|
| 171 |
(
|
| 172 |
"""
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
from langchain.chat_models import ChatOpenAI
|
| 3 |
+
from src.summarization import summarize_wrapper
|
|
|
|
| 4 |
import os
|
| 5 |
|
| 6 |
+
import pypdfium2 as pdfium
|
| 7 |
+
from PIL import Image
|
| 8 |
+
|
| 9 |
+
# Function to render a specific page of a PDF file as an image
|
| 10 |
+
def render_file(file):
|
| 11 |
+
pdf = pdfium.PdfDocument(file.name)
|
| 12 |
+
|
| 13 |
+
page = pdf.get_page(0)
|
| 14 |
+
bitmap = page.render(
|
| 15 |
+
scale=300 / 72, # 300dpi resolution
|
| 16 |
+
rotation=0, # no additional rotation
|
| 17 |
+
# ... further rendering options
|
| 18 |
+
)
|
| 19 |
+
pil_image = bitmap.to_pil()
|
| 20 |
+
|
| 21 |
+
# WORK FROM HERE TO RENDER THE COMPLETE PDF
|
| 22 |
+
# bitmap = pdf.render(
|
| 23 |
+
# pdfium.PdfBitmap.to_pil,
|
| 24 |
+
# page_indices=[0, 1, 2],
|
| 25 |
+
# scale=300 / 72, # 300dpi resolution
|
| 26 |
+
# )
|
| 27 |
+
# pil_image = bitmap.to_pil()
|
| 28 |
+
return pil_image
|
| 29 |
+
|
| 30 |
|
| 31 |
def switch_buttons(interactive: bool):
|
| 32 |
"""This switches the buttons to interactive or not interactive.
|
|
|
|
| 47 |
)
|
| 48 |
|
| 49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
def run_summarization_model_gradio(
|
| 51 |
llm: ChatOpenAI,
|
| 52 |
share_gradio_via_link: bool = False,
|
|
|
|
| 71 |
) as webui:
|
| 72 |
with gr.Row().style(equal_height=True):
|
| 73 |
Header_box = generate_title(title=title, description=description)
|
| 74 |
+
with gr.Row().style(equal_height=True):
|
| 75 |
+
clear = gr.Button("Clear")
|
| 76 |
+
file_upload = gr.File(
|
| 77 |
+
file_count="single",
|
| 78 |
+
file_types=[".pdf", ".txt"],
|
| 79 |
+
label="Upload PDF",
|
| 80 |
+
)
|
| 81 |
with gr.Row().style(equal_height=True):
|
| 82 |
summary_short = gr.Button("Kurze Zusammenfassung", interactive=False)
|
| 83 |
summary_middle = gr.Button("Mittlere Zusammenfassung", interactive=False)
|
| 84 |
summary_long = gr.Button("Lange Zusammenfassung", interactive=False)
|
| 85 |
with gr.Row().style(equal_height=True):
|
| 86 |
with gr.Column(scale=1):
|
| 87 |
+
summary_output = (
|
| 88 |
+
gr.Textbox(label="Zusammenfassung")
|
| 89 |
+
.style(show_copy_button=True)
|
| 90 |
+
.style(height=680)
|
| 91 |
)
|
| 92 |
+
with gr.Column(scale=1):
|
| 93 |
+
show_img = gr.Image(label="Uploaded PDF", tool="select").style(
|
| 94 |
+
height=680
|
| 95 |
)
|
| 96 |
|
| 97 |
+
# Once a file is uploaded, enable the summarization buttons and visualize the uploaded file
|
| 98 |
file_upload.upload(
|
| 99 |
switch_buttons,
|
| 100 |
[gr.State(True)],
|
| 101 |
[summary_short, summary_middle, summary_long],
|
| 102 |
queue=False,
|
| 103 |
+
).then(fn=render_file, inputs=[file_upload], outputs=[show_img])
|
| 104 |
|
| 105 |
# If you click any button first disable all buttons, then summarzize and then enable the clicked button
|
| 106 |
for s, summarization_type in [
|
|
|
|
| 131 |
)
|
| 132 |
|
| 133 |
# The clear button clears the dashboard
|
|
|
|
| 134 |
clear.click(lambda: None, None, summary_output, queue=False).then(
|
| 135 |
lambda: None, None, file_upload, queue=False
|
| 136 |
).then(
|
|
|
|
| 153 |
|
| 154 |
|
| 155 |
def generate_title(title: str, description: str):
|
| 156 |
+
"""THIS IS AN EXAMPLE HTML CODE FOR A TITLE"""
|
| 157 |
return gr.HTML(
|
| 158 |
(
|
| 159 |
"""
|
src/mailing.py
ADDED
|
File without changes
|
src/summarization.py
CHANGED
|
@@ -2,6 +2,7 @@ from langchain.document_loaders import PyPDFLoader, TextLoader
|
|
| 2 |
from langchain.chains.summarize import load_summarize_chain
|
| 3 |
from langchain.chat_models import ChatOpenAI
|
| 4 |
from langchain.docstore.document import Document
|
|
|
|
| 5 |
from typing import Dict, List
|
| 6 |
|
| 7 |
|
|
@@ -49,3 +50,46 @@ def summarize(
|
|
| 49 |
)
|
| 50 |
summary = chain.run(docs)
|
| 51 |
return summary
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
from langchain.chains.summarize import load_summarize_chain
|
| 3 |
from langchain.chat_models import ChatOpenAI
|
| 4 |
from langchain.docstore.document import Document
|
| 5 |
+
from src.prompts import prompts
|
| 6 |
from typing import Dict, List
|
| 7 |
|
| 8 |
|
|
|
|
| 50 |
)
|
| 51 |
summary = chain.run(docs)
|
| 52 |
return summary
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def summarize_wrapper(
|
| 56 |
+
file: str, llm: ChatOpenAI, summarization_type: str, summarization_kwargs: dict
|
| 57 |
+
) -> str:
|
| 58 |
+
"""Wrapper for the summarization function to make it compatible with gradio.
|
| 59 |
+
|
| 60 |
+
Args:
|
| 61 |
+
file (str): Path to the file. This can either be a local path or a tempfile.TemporaryFileWrapper_.
|
| 62 |
+
llm (ChatOpenAI): Language model.
|
| 63 |
+
summarization_type (str): Type of summarization. Can be either "short", "middle" or "long".
|
| 64 |
+
summarization_kwargs (dict): Keyword arguments for the summarization.
|
| 65 |
+
|
| 66 |
+
Returns:
|
| 67 |
+
str: Summarization of the file.
|
| 68 |
+
"""
|
| 69 |
+
if summarization_type == "short":
|
| 70 |
+
summarization_kwargs.update(
|
| 71 |
+
dict(
|
| 72 |
+
map_prompt=prompts["short_de"]["map_prompt"],
|
| 73 |
+
combine_prompt=prompts["short_de"]["combine_prompt"],
|
| 74 |
+
)
|
| 75 |
+
)
|
| 76 |
+
elif summarization_type == "middle":
|
| 77 |
+
summarization_kwargs.update(
|
| 78 |
+
dict(
|
| 79 |
+
map_prompt=prompts["middle_de"]["map_prompt"],
|
| 80 |
+
combine_prompt=prompts["middle_de"]["combine_prompt"],
|
| 81 |
+
)
|
| 82 |
+
)
|
| 83 |
+
elif summarization_type == "long":
|
| 84 |
+
summarization_kwargs.update(
|
| 85 |
+
dict(
|
| 86 |
+
map_prompt=prompts["long_de"]["map_prompt"],
|
| 87 |
+
combine_prompt=prompts["long_de"]["combine_prompt"],
|
| 88 |
+
)
|
| 89 |
+
)
|
| 90 |
+
else:
|
| 91 |
+
raise ValueError(f"Summarization type {summarization_type} is not supported.")
|
| 92 |
+
|
| 93 |
+
return summarize(
|
| 94 |
+
file_path=file.name, llm=llm[0], summarization_kwargs=summarization_kwargs
|
| 95 |
+
)
|