fvde commited on
Commit
51fbfa6
·
1 Parent(s): f393851

Upload folder using huggingface_hub

Browse files
requirements.txt CHANGED
@@ -9,4 +9,5 @@ grpcio-tools==1.54.2
9
  gpt_index==0.4.24
10
  langchain==0.0.190
11
  environs==9.5.0
12
- pypdf==3.9.1
 
 
9
  gpt_index==0.4.24
10
  langchain==0.0.190
11
  environs==9.5.0
12
+ pypdf==3.9.1
13
+ pypdfium2==4.18.0
src/__pycache__/gradio_app.cpython-39.pyc CHANGED
Binary files a/src/__pycache__/gradio_app.cpython-39.pyc and b/src/__pycache__/gradio_app.cpython-39.pyc differ
 
src/__pycache__/summarization.cpython-39.pyc CHANGED
Binary files a/src/__pycache__/summarization.cpython-39.pyc and b/src/__pycache__/summarization.cpython-39.pyc differ
 
src/gradio_app.py CHANGED
@@ -1,9 +1,32 @@
1
  import gradio as gr
2
  from langchain.chat_models import ChatOpenAI
3
- from src.summarization import summarize
4
- from src.prompts import prompts
5
  import os
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  def switch_buttons(interactive: bool):
9
  """This switches the buttons to interactive or not interactive.
@@ -24,49 +47,6 @@ def switch_buttons(interactive: bool):
24
  )
25
 
26
 
27
- def summarize_wrapper(
28
- file: str, llm: ChatOpenAI, summarization_type: str, summarization_kwargs: dict
29
- ) -> str:
30
- """Wrapper for the summarization function to make it compatible with gradio.
31
-
32
- Args:
33
- file (str): Path to the file. This can either be a local path or a tempfile.TemporaryFileWrapper_.
34
- llm (ChatOpenAI): Language model.
35
- summarization_type (str): Type of summarization. Can be either "short", "middle" or "long".
36
- summarization_kwargs (dict): Keyword arguments for the summarization.
37
-
38
- Returns:
39
- str: Summarization of the file.
40
- """
41
- if summarization_type == "short":
42
- summarization_kwargs.update(
43
- dict(
44
- map_prompt=prompts["short_de"]["map_prompt"],
45
- combine_prompt=prompts["short_de"]["combine_prompt"],
46
- )
47
- )
48
- elif summarization_type == "middle":
49
- summarization_kwargs.update(
50
- dict(
51
- map_prompt=prompts["middle_de"]["map_prompt"],
52
- combine_prompt=prompts["middle_de"]["combine_prompt"],
53
- )
54
- )
55
- elif summarization_type == "long":
56
- summarization_kwargs.update(
57
- dict(
58
- map_prompt=prompts["long_de"]["map_prompt"],
59
- combine_prompt=prompts["long_de"]["combine_prompt"],
60
- )
61
- )
62
- else:
63
- raise ValueError(f"Summarization type {summarization_type} is not supported.")
64
-
65
- return summarize(
66
- file_path=file.name, llm=llm[0], summarization_kwargs=summarization_kwargs
67
- )
68
-
69
-
70
  def run_summarization_model_gradio(
71
  llm: ChatOpenAI,
72
  share_gradio_via_link: bool = False,
@@ -91,29 +71,36 @@ def run_summarization_model_gradio(
91
  ) as webui:
92
  with gr.Row().style(equal_height=True):
93
  Header_box = generate_title(title=title, description=description)
 
 
 
 
 
 
 
94
  with gr.Row().style(equal_height=True):
95
  summary_short = gr.Button("Kurze Zusammenfassung", interactive=False)
96
  summary_middle = gr.Button("Mittlere Zusammenfassung", interactive=False)
97
  summary_long = gr.Button("Lange Zusammenfassung", interactive=False)
98
  with gr.Row().style(equal_height=True):
99
  with gr.Column(scale=1):
100
- file_upload = gr.File(
101
- file_count="single",
102
- file_types=[".pdf", ".txt"],
103
- label="Upload PDF",
104
  )
105
- with gr.Column(scale=4):
106
- summary_output = gr.Textbox(label="Zusammenfassung").style(
107
- show_copy_button=True
108
  )
109
 
110
- # Once a file is uploaded, enable the summarization buttons
111
  file_upload.upload(
112
  switch_buttons,
113
  [gr.State(True)],
114
  [summary_short, summary_middle, summary_long],
115
  queue=False,
116
- )
117
 
118
  # If you click any button first disable all buttons, then summarzize and then enable the clicked button
119
  for s, summarization_type in [
@@ -144,7 +131,6 @@ def run_summarization_model_gradio(
144
  )
145
 
146
  # The clear button clears the dashboard
147
- clear = gr.Button("Clear")
148
  clear.click(lambda: None, None, summary_output, queue=False).then(
149
  lambda: None, None, file_upload, queue=False
150
  ).then(
@@ -167,6 +153,7 @@ def run_summarization_model_gradio(
167
 
168
 
169
  def generate_title(title: str, description: str):
 
170
  return gr.HTML(
171
  (
172
  """
 
1
  import gradio as gr
2
  from langchain.chat_models import ChatOpenAI
3
+ from src.summarization import summarize_wrapper
 
4
  import os
5
 
6
+ import pypdfium2 as pdfium
7
+ from PIL import Image
8
+
9
+ # Function to render a specific page of a PDF file as an image
10
+ def render_file(file):
11
+ pdf = pdfium.PdfDocument(file.name)
12
+
13
+ page = pdf.get_page(0)
14
+ bitmap = page.render(
15
+ scale=300 / 72, # 300dpi resolution
16
+ rotation=0, # no additional rotation
17
+ # ... further rendering options
18
+ )
19
+ pil_image = bitmap.to_pil()
20
+
21
+ # WORK FROM HERE TO RENDER THE COMPLETE PDF
22
+ # bitmap = pdf.render(
23
+ # pdfium.PdfBitmap.to_pil,
24
+ # page_indices=[0, 1, 2],
25
+ # scale=300 / 72, # 300dpi resolution
26
+ # )
27
+ # pil_image = bitmap.to_pil()
28
+ return pil_image
29
+
30
 
31
  def switch_buttons(interactive: bool):
32
  """This switches the buttons to interactive or not interactive.
 
47
  )
48
 
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  def run_summarization_model_gradio(
51
  llm: ChatOpenAI,
52
  share_gradio_via_link: bool = False,
 
71
  ) as webui:
72
  with gr.Row().style(equal_height=True):
73
  Header_box = generate_title(title=title, description=description)
74
+ with gr.Row().style(equal_height=True):
75
+ clear = gr.Button("Clear")
76
+ file_upload = gr.File(
77
+ file_count="single",
78
+ file_types=[".pdf", ".txt"],
79
+ label="Upload PDF",
80
+ )
81
  with gr.Row().style(equal_height=True):
82
  summary_short = gr.Button("Kurze Zusammenfassung", interactive=False)
83
  summary_middle = gr.Button("Mittlere Zusammenfassung", interactive=False)
84
  summary_long = gr.Button("Lange Zusammenfassung", interactive=False)
85
  with gr.Row().style(equal_height=True):
86
  with gr.Column(scale=1):
87
+ summary_output = (
88
+ gr.Textbox(label="Zusammenfassung")
89
+ .style(show_copy_button=True)
90
+ .style(height=680)
91
  )
92
+ with gr.Column(scale=1):
93
+ show_img = gr.Image(label="Uploaded PDF", tool="select").style(
94
+ height=680
95
  )
96
 
97
+ # Once a file is uploaded, enable the summarization buttons and visualize the uploaded file
98
  file_upload.upload(
99
  switch_buttons,
100
  [gr.State(True)],
101
  [summary_short, summary_middle, summary_long],
102
  queue=False,
103
+ ).then(fn=render_file, inputs=[file_upload], outputs=[show_img])
104
 
105
  # If you click any button first disable all buttons, then summarzize and then enable the clicked button
106
  for s, summarization_type in [
 
131
  )
132
 
133
  # The clear button clears the dashboard
 
134
  clear.click(lambda: None, None, summary_output, queue=False).then(
135
  lambda: None, None, file_upload, queue=False
136
  ).then(
 
153
 
154
 
155
  def generate_title(title: str, description: str):
156
+ """THIS IS AN EXAMPLE HTML CODE FOR A TITLE"""
157
  return gr.HTML(
158
  (
159
  """
src/mailing.py ADDED
File without changes
src/summarization.py CHANGED
@@ -2,6 +2,7 @@ from langchain.document_loaders import PyPDFLoader, TextLoader
2
  from langchain.chains.summarize import load_summarize_chain
3
  from langchain.chat_models import ChatOpenAI
4
  from langchain.docstore.document import Document
 
5
  from typing import Dict, List
6
 
7
 
@@ -49,3 +50,46 @@ def summarize(
49
  )
50
  summary = chain.run(docs)
51
  return summary
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  from langchain.chains.summarize import load_summarize_chain
3
  from langchain.chat_models import ChatOpenAI
4
  from langchain.docstore.document import Document
5
+ from src.prompts import prompts
6
  from typing import Dict, List
7
 
8
 
 
50
  )
51
  summary = chain.run(docs)
52
  return summary
53
+
54
+
55
+ def summarize_wrapper(
56
+ file: str, llm: ChatOpenAI, summarization_type: str, summarization_kwargs: dict
57
+ ) -> str:
58
+ """Wrapper for the summarization function to make it compatible with gradio.
59
+
60
+ Args:
61
+ file (str): Path to the file. This can either be a local path or a tempfile.TemporaryFileWrapper_.
62
+ llm (ChatOpenAI): Language model.
63
+ summarization_type (str): Type of summarization. Can be either "short", "middle" or "long".
64
+ summarization_kwargs (dict): Keyword arguments for the summarization.
65
+
66
+ Returns:
67
+ str: Summarization of the file.
68
+ """
69
+ if summarization_type == "short":
70
+ summarization_kwargs.update(
71
+ dict(
72
+ map_prompt=prompts["short_de"]["map_prompt"],
73
+ combine_prompt=prompts["short_de"]["combine_prompt"],
74
+ )
75
+ )
76
+ elif summarization_type == "middle":
77
+ summarization_kwargs.update(
78
+ dict(
79
+ map_prompt=prompts["middle_de"]["map_prompt"],
80
+ combine_prompt=prompts["middle_de"]["combine_prompt"],
81
+ )
82
+ )
83
+ elif summarization_type == "long":
84
+ summarization_kwargs.update(
85
+ dict(
86
+ map_prompt=prompts["long_de"]["map_prompt"],
87
+ combine_prompt=prompts["long_de"]["combine_prompt"],
88
+ )
89
+ )
90
+ else:
91
+ raise ValueError(f"Summarization type {summarization_type} is not supported.")
92
+
93
+ return summarize(
94
+ file_path=file.name, llm=llm[0], summarization_kwargs=summarization_kwargs
95
+ )