Upload folder using huggingface_hub
Browse files- configuration/deployment.json +1 -1
- configuration/example.json +1 -1
- src/__pycache__/gradio_app.cpython-39.pyc +0 -0
- src/__pycache__/prompts.cpython-39.pyc +0 -0
- src/__pycache__/summarization.cpython-39.pyc +0 -0
- src/gradio_app.py +32 -2
- src/prompts.py +112 -0
- src/summarization.py +120 -9
configuration/deployment.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"language_model_kwargs": {
|
| 3 |
-
"model_name": "gpt-
|
| 4 |
"temperature": 0.0
|
| 5 |
},
|
| 6 |
"summarization_kwargs": {
|
|
|
|
| 1 |
{
|
| 2 |
"language_model_kwargs": {
|
| 3 |
+
"model_name": "gpt-3.5-turbo-16k",
|
| 4 |
"temperature": 0.0
|
| 5 |
},
|
| 6 |
"summarization_kwargs": {
|
configuration/example.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"language_model_kwargs": {
|
| 3 |
-
"model_name": "gpt-3.5-turbo",
|
| 4 |
"temperature": 0.0
|
| 5 |
},
|
| 6 |
"summarization_kwargs": {
|
|
|
|
| 1 |
{
|
| 2 |
"language_model_kwargs": {
|
| 3 |
+
"model_name": "gpt-3.5-turbo-16k",
|
| 4 |
"temperature": 0.0
|
| 5 |
},
|
| 6 |
"summarization_kwargs": {
|
src/__pycache__/gradio_app.cpython-39.pyc
CHANGED
|
Binary files a/src/__pycache__/gradio_app.cpython-39.pyc and b/src/__pycache__/gradio_app.cpython-39.pyc differ
|
|
|
src/__pycache__/prompts.cpython-39.pyc
CHANGED
|
Binary files a/src/__pycache__/prompts.cpython-39.pyc and b/src/__pycache__/prompts.cpython-39.pyc differ
|
|
|
src/__pycache__/summarization.cpython-39.pyc
CHANGED
|
Binary files a/src/__pycache__/summarization.cpython-39.pyc and b/src/__pycache__/summarization.cpython-39.pyc differ
|
|
|
src/gradio_app.py
CHANGED
|
@@ -4,7 +4,7 @@ import pypdfium2 as pdfium
|
|
| 4 |
import gradio as gr
|
| 5 |
|
| 6 |
from langchain.chat_models import ChatOpenAI
|
| 7 |
-
from src.summarization import summarize_wrapper
|
| 8 |
from src.mailing import send_email
|
| 9 |
|
| 10 |
# Function to render a specific page of a PDF file as an image
|
|
@@ -79,6 +79,7 @@ def run_summarization_model_gradio(
|
|
| 79 |
summary_short = gr.Button("Kurze Zusammenfassung", interactive=False)
|
| 80 |
summary_middle = gr.Button("Mittlere Zusammenfassung", interactive=False)
|
| 81 |
summary_long = gr.Button("Lange Zusammenfassung", interactive=False)
|
|
|
|
| 82 |
with gr.Row().style(equal_height=True):
|
| 83 |
with gr.Column(scale=1):
|
| 84 |
summary_output = gr.Textbox(label="Zusammenfassung", lines=9).style(
|
|
@@ -114,7 +115,14 @@ def run_summarization_model_gradio(
|
|
| 114 |
[gr.State(True)],
|
| 115 |
[summary_short, summary_middle, summary_long],
|
| 116 |
queue=False,
|
| 117 |
-
).then(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
|
| 119 |
# If you click any button first disable all buttons, then summarzize and then enable the clicked button
|
| 120 |
for s, summarization_type in [
|
|
@@ -149,6 +157,28 @@ def run_summarization_model_gradio(
|
|
| 149 |
queue=False,
|
| 150 |
)
|
| 151 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
# The clear button clears the dashboard
|
| 153 |
clear.click(lambda: None, None, summary_output, queue=False).then(
|
| 154 |
lambda: None, None, file_upload, queue=False
|
|
|
|
| 4 |
import gradio as gr
|
| 5 |
|
| 6 |
from langchain.chat_models import ChatOpenAI
|
| 7 |
+
from src.summarization import summarize_wrapper, parallel_summarization
|
| 8 |
from src.mailing import send_email
|
| 9 |
|
| 10 |
# Function to render a specific page of a PDF file as an image
|
|
|
|
| 79 |
summary_short = gr.Button("Kurze Zusammenfassung", interactive=False)
|
| 80 |
summary_middle = gr.Button("Mittlere Zusammenfassung", interactive=False)
|
| 81 |
summary_long = gr.Button("Lange Zusammenfassung", interactive=False)
|
| 82 |
+
summary_parallel = gr.Button("Parallele Zusammenfassung", interactive=False)
|
| 83 |
with gr.Row().style(equal_height=True):
|
| 84 |
with gr.Column(scale=1):
|
| 85 |
summary_output = gr.Textbox(label="Zusammenfassung", lines=9).style(
|
|
|
|
| 115 |
[gr.State(True)],
|
| 116 |
[summary_short, summary_middle, summary_long],
|
| 117 |
queue=False,
|
| 118 |
+
).then(
|
| 119 |
+
switch_buttons,
|
| 120 |
+
[gr.State(True)],
|
| 121 |
+
[summary_parallel, gr.State(None), gr.State(None)],
|
| 122 |
+
queue=False,
|
| 123 |
+
).then(
|
| 124 |
+
fn=render_file, inputs=[file_upload], outputs=[show_pdf]
|
| 125 |
+
)
|
| 126 |
|
| 127 |
# If you click any button first disable all buttons, then summarzize and then enable the clicked button
|
| 128 |
for s, summarization_type in [
|
|
|
|
| 157 |
queue=False,
|
| 158 |
)
|
| 159 |
|
| 160 |
+
summary_parallel.click(
|
| 161 |
+
switch_buttons,
|
| 162 |
+
[gr.State(False)],
|
| 163 |
+
[summary_short, summary_middle, summary_long],
|
| 164 |
+
queue=False,
|
| 165 |
+
).then(
|
| 166 |
+
parallel_summarization,
|
| 167 |
+
[file_upload, gr.State([llm]), gr.State(summarization_kwargs)],
|
| 168 |
+
[summary_output],
|
| 169 |
+
queue=False,
|
| 170 |
+
).then(
|
| 171 |
+
switch_buttons,
|
| 172 |
+
[gr.State(True)],
|
| 173 |
+
[summary_short, summary_middle, summary_long],
|
| 174 |
+
queue=False,
|
| 175 |
+
).then(
|
| 176 |
+
switch_buttons,
|
| 177 |
+
[gr.State(True)],
|
| 178 |
+
[send_email_button, gr.State(None), gr.State(None)],
|
| 179 |
+
queue=False,
|
| 180 |
+
)
|
| 181 |
+
|
| 182 |
# The clear button clears the dashboard
|
| 183 |
clear.click(lambda: None, None, summary_output, queue=False).then(
|
| 184 |
lambda: None, None, file_upload, queue=False
|
src/prompts.py
CHANGED
|
@@ -150,3 +150,115 @@ Die Teile der Zusammenfassung mit Angabe der Seitenzahlen:
|
|
| 150 |
),
|
| 151 |
},
|
| 152 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
),
|
| 151 |
},
|
| 152 |
}
|
| 153 |
+
|
| 154 |
+
|
| 155 |
+
def get_template_mp(name: str, headline: str, additional_text: str = ""):
|
| 156 |
+
base_multi = (
|
| 157 |
+
"Schreibe, ein/e <KEY> des Urteils, das durch dreifache Anführungszeichen begrenzt ist, in maximal einem Paragraphen.\n"
|
| 158 |
+
"<ADDITIONAL_TEXT>\n"
|
| 159 |
+
'Als Überschrift muss "<HEAD_LINE>" angegeben werden. \n'
|
| 160 |
+
# "Nach dem Paragraph müssen die Seiten angegeben werden die genutzt wurden."
|
| 161 |
+
"Urteil:\n"
|
| 162 |
+
"```{text}```\n"
|
| 163 |
+
"\n"
|
| 164 |
+
"\n\nText:\n"
|
| 165 |
+
)
|
| 166 |
+
return (
|
| 167 |
+
base_multi.replace("<KEY>", name)
|
| 168 |
+
.replace("<HEAD_LINE>", headline)
|
| 169 |
+
.replace("<ADDITIONAL_TEXT>", additional_text)
|
| 170 |
+
)
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
prompts_parallel = {
|
| 174 |
+
"intro": PromptTemplate(
|
| 175 |
+
input_variables=["text"],
|
| 176 |
+
template=get_template_mp(name="Einleitung", headline="I. Einleitung"),
|
| 177 |
+
),
|
| 178 |
+
"darstellung_des_rechtsproblems": PromptTemplate(
|
| 179 |
+
input_variables=["text"],
|
| 180 |
+
template=get_template_mp(
|
| 181 |
+
name="Darstellung des Rechtsproblems",
|
| 182 |
+
headline="Darstellung des Rechtsproblems",
|
| 183 |
+
),
|
| 184 |
+
),
|
| 185 |
+
"angaben_ueber_das_urteil": PromptTemplate(
|
| 186 |
+
input_variables=["text"],
|
| 187 |
+
template=get_template_mp(
|
| 188 |
+
name="Angaben über das Urteil",
|
| 189 |
+
headline="Angaben über das Urteil",
|
| 190 |
+
additional_text="Gib die folgenden Informationen an: Gericht, Datum, Aktenzeichen (AZ: ...), Fundstelle(n)",
|
| 191 |
+
),
|
| 192 |
+
),
|
| 193 |
+
"sachverhalt": PromptTemplate(
|
| 194 |
+
input_variables=["text"],
|
| 195 |
+
template=get_template_mp(
|
| 196 |
+
name="Sachverhalt",
|
| 197 |
+
headline="Sachverhalt (unter Rückgriff auf Instanzentscheidung)",
|
| 198 |
+
additional_text=(
|
| 199 |
+
"Beziehe dich auf die Instanzentscheidung.\n"
|
| 200 |
+
"Es soll nur der Sachverhalt des Urteils wiedergegeben werden. Wenn das Urteil keinen Sachverhalt hat schreib: 'Keine Informationen zum Sachverhalt vorhanden'."
|
| 201 |
+
),
|
| 202 |
+
),
|
| 203 |
+
),
|
| 204 |
+
"prozessgeschichte": PromptTemplate(
|
| 205 |
+
input_variables=["text"],
|
| 206 |
+
template=get_template_mp(
|
| 207 |
+
name="Prozessgeschichte", headline="3. Prozessgeschichte"
|
| 208 |
+
),
|
| 209 |
+
),
|
| 210 |
+
"rechtsproblem": PromptTemplate(
|
| 211 |
+
input_variables=["text"],
|
| 212 |
+
template=get_template_mp(
|
| 213 |
+
name="Rechtsproblem",
|
| 214 |
+
headline="Rechtsproblem",
|
| 215 |
+
additional_text="Das Problem des Falles ist genau herauszuarbeiten und im rechtlichen Kontext zu verankern.",
|
| 216 |
+
),
|
| 217 |
+
),
|
| 218 |
+
"loesung_des_gerichts": PromptTemplate(
|
| 219 |
+
input_variables=["text"],
|
| 220 |
+
template=get_template_mp(
|
| 221 |
+
name="Lösung des Gerichts", headline="Lösung des Gerichts"
|
| 222 |
+
),
|
| 223 |
+
),
|
| 224 |
+
"loesungsansaetze_zum_problem": PromptTemplate(
|
| 225 |
+
input_variables=["text"],
|
| 226 |
+
template=get_template_mp(
|
| 227 |
+
name="Lösungsansätze zum Problem",
|
| 228 |
+
headline="Lösungsansätze zum Problem",
|
| 229 |
+
additional_text="Knappe, aber möglichst vollständige Übersicht der vertretenen Ansichten bzw. der Lösungsvorschläge im Urteil.",
|
| 230 |
+
),
|
| 231 |
+
),
|
| 232 |
+
"analyse_und_einordnung_der_entscheidung": PromptTemplate(
|
| 233 |
+
input_variables=["text"],
|
| 234 |
+
template=get_template_mp(
|
| 235 |
+
name="Analyse und Einordnung der Entscheidung",
|
| 236 |
+
headline="Analyse und Einordnung der Entscheidung",
|
| 237 |
+
additional_text="Es soll nur der Inhalt des Urteils wiedergegeben werden.",
|
| 238 |
+
),
|
| 239 |
+
),
|
| 240 |
+
"bewertung_und_kritik_der_entscheidung": PromptTemplate(
|
| 241 |
+
input_variables=["text"],
|
| 242 |
+
template=get_template_mp(
|
| 243 |
+
name="Bewertung und Kritik der Entscheidung",
|
| 244 |
+
headline="Bewertung und Kritik der Entscheidung",
|
| 245 |
+
additional_text="Verwende ausschließlich den Kontext des Urteils und schreib keinen neuen Text. Wenn keine Bewertung oder Kritik vorhanden ist, antworte mit 'Keine Bewertung oder Kritik vorhanden.'",
|
| 246 |
+
),
|
| 247 |
+
),
|
| 248 |
+
"eigener_loesungsvorschlag": PromptTemplate(
|
| 249 |
+
input_variables=["text"],
|
| 250 |
+
template=get_template_mp(
|
| 251 |
+
name="Eigener Lösungsvorschlag",
|
| 252 |
+
headline="Eigener Lösungsvorschlag",
|
| 253 |
+
additional_text="Es soll nur der Inhalt des Urteils wiedergegeben werden. Wenn das Urteil keinen eigenen Lösungsvorschlag hat schreib: 'Keine Informationen zum eigenen Lösungsvorschlag vorhanden'",
|
| 254 |
+
),
|
| 255 |
+
),
|
| 256 |
+
"ausblick": PromptTemplate(
|
| 257 |
+
input_variables=["text"],
|
| 258 |
+
template=get_template_mp(
|
| 259 |
+
name="Ausblick",
|
| 260 |
+
headline="Ausblick",
|
| 261 |
+
additional_text="Es soll nur der Inhalt des Urteils wiedergegeben werden. Wenn das Urteil keinen Ausblick gibt schreib: 'Keine Informationen zum Auslbick vorhanden'.",
|
| 262 |
+
),
|
| 263 |
+
),
|
| 264 |
+
}
|
src/summarization.py
CHANGED
|
@@ -4,15 +4,18 @@ from langchain.chains.llm import LLMChain
|
|
| 4 |
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
|
| 5 |
from langchain.chat_models import ChatOpenAI
|
| 6 |
from langchain.docstore.document import Document
|
| 7 |
-
from src.prompts import prompts
|
|
|
|
| 8 |
from typing import Dict, List
|
|
|
|
| 9 |
|
| 10 |
|
| 11 |
-
def load_docs(file_path: str) -> List[Document]:
|
| 12 |
"""Load a file and return the text.
|
| 13 |
|
| 14 |
Args:
|
| 15 |
file_path (str): Path to the pdf file. This can either be a local path or a tempfile.TemporaryFileWrapper_.
|
|
|
|
| 16 |
|
| 17 |
Raises:
|
| 18 |
ValueError: If the file type is not supported.
|
|
@@ -33,17 +36,15 @@ def load_docs(file_path: str) -> List[Document]:
|
|
| 33 |
for doc in docs:
|
| 34 |
doc.page_content = doc.page_content.replace("\n", " \n ")
|
| 35 |
# if doc contains a page append it to the text
|
| 36 |
-
if hasattr(doc, "metadata"):
|
| 37 |
-
doc.page_content = (
|
| 38 |
-
|
| 39 |
-
+ doc.page_content
|
| 40 |
-
+ f" \n Ende Seite {doc.metadata.get('page')+1}"
|
| 41 |
)
|
| 42 |
|
| 43 |
return docs
|
| 44 |
|
| 45 |
|
| 46 |
-
def
|
| 47 |
file_path: str, llm: ChatOpenAI, summarization_kwargs: Dict[str, str]
|
| 48 |
) -> str:
|
| 49 |
"""Summarize a pdf file. The summarization is done by the language model.
|
|
@@ -109,6 +110,116 @@ def summarize_wrapper(
|
|
| 109 |
else:
|
| 110 |
raise ValueError(f"Summarization type {summarization_type} is not supported.")
|
| 111 |
|
| 112 |
-
return
|
| 113 |
file_path=file.name, llm=llm[0], summarization_kwargs=summarization_kwargs
|
| 114 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
|
| 5 |
from langchain.chat_models import ChatOpenAI
|
| 6 |
from langchain.docstore.document import Document
|
| 7 |
+
from src.prompts import prompts, prompts_parallel
|
| 8 |
+
import time
|
| 9 |
from typing import Dict, List
|
| 10 |
+
import asyncio
|
| 11 |
|
| 12 |
|
| 13 |
+
def load_docs(file_path: str, with_pageinfo: bool = True) -> List[Document]:
|
| 14 |
"""Load a file and return the text.
|
| 15 |
|
| 16 |
Args:
|
| 17 |
file_path (str): Path to the pdf file. This can either be a local path or a tempfile.TemporaryFileWrapper_.
|
| 18 |
+
with_pageinfo (bool, optional): If True the page information is added to the document. Defaults to True.
|
| 19 |
|
| 20 |
Raises:
|
| 21 |
ValueError: If the file type is not supported.
|
|
|
|
| 36 |
for doc in docs:
|
| 37 |
doc.page_content = doc.page_content.replace("\n", " \n ")
|
| 38 |
# if doc contains a page append it to the text
|
| 39 |
+
if with_pageinfo and hasattr(doc, "metadata"):
|
| 40 |
+
doc.page_content = f"(Quelle Seite: {doc.metadata.get('page')+1}) .".join(
|
| 41 |
+
doc.page_content.split(" .")
|
|
|
|
|
|
|
| 42 |
)
|
| 43 |
|
| 44 |
return docs
|
| 45 |
|
| 46 |
|
| 47 |
+
def summarize_chain(
|
| 48 |
file_path: str, llm: ChatOpenAI, summarization_kwargs: Dict[str, str]
|
| 49 |
) -> str:
|
| 50 |
"""Summarize a pdf file. The summarization is done by the language model.
|
|
|
|
| 110 |
else:
|
| 111 |
raise ValueError(f"Summarization type {summarization_type} is not supported.")
|
| 112 |
|
| 113 |
+
return summarize_chain(
|
| 114 |
file_path=file.name, llm=llm[0], summarization_kwargs=summarization_kwargs
|
| 115 |
)
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
async def async_generate(
|
| 119 |
+
llm: ChatOpenAI, docs: List[Document], summarization_kwargs: dict, k: str
|
| 120 |
+
) -> dict:
|
| 121 |
+
"""Asyncronous summarization.
|
| 122 |
+
|
| 123 |
+
Args:
|
| 124 |
+
llm (ChatOpenAI): Language model to use for the summarization.
|
| 125 |
+
docs (List[Document]): List of documents.
|
| 126 |
+
summarization_kwargs (dict): Keyword arguments for the summarization.
|
| 127 |
+
k (str): Key for the summarization.
|
| 128 |
+
|
| 129 |
+
Returns:
|
| 130 |
+
dict: Dictionary with the summarization.
|
| 131 |
+
"""
|
| 132 |
+
print(f"Starting summarization for {k}")
|
| 133 |
+
now = time.time()
|
| 134 |
+
# chain = load_summarize_chain(llm=llm, **summarization_kwargs)
|
| 135 |
+
chain = LLMChain(llm=llm, **summarization_kwargs)
|
| 136 |
+
resp = await chain.arun(text=docs)
|
| 137 |
+
print(f"Time taken for {k}: ", time.time() - now)
|
| 138 |
+
return {k: resp}
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
async def generate_concurrently(file_path: str, llm: ChatOpenAI) -> List[dict]:
|
| 142 |
+
"""Parallel summarization.
|
| 143 |
+
|
| 144 |
+
Args:
|
| 145 |
+
file_path (str): Path to the pdf file. This can either be a local path or a tempfile.TemporaryFileWrapper_.
|
| 146 |
+
llm (ChatOpenAI): Language model to use for the summarization.
|
| 147 |
+
|
| 148 |
+
Returns:
|
| 149 |
+
List: List of summarizations.
|
| 150 |
+
"""
|
| 151 |
+
|
| 152 |
+
docs = load_docs(file_path=file_path, with_pageinfo=False)
|
| 153 |
+
summarization_kwargs = dict()
|
| 154 |
+
|
| 155 |
+
# create parallel tasks
|
| 156 |
+
tasks = []
|
| 157 |
+
i = 0
|
| 158 |
+
for k, pt in prompts_parallel.items():
|
| 159 |
+
sk = summarization_kwargs.copy()
|
| 160 |
+
sk["prompt"] = pt
|
| 161 |
+
print(f"Appending task for {k}")
|
| 162 |
+
tasks.append(async_generate(llm=llm, docs=docs, summarization_kwargs=sk, k=k))
|
| 163 |
+
print("-------------------")
|
| 164 |
+
# execute all coroutines concurrently
|
| 165 |
+
values = await asyncio.gather(*tasks)
|
| 166 |
+
|
| 167 |
+
# report return values
|
| 168 |
+
values_flattened = {}
|
| 169 |
+
for v in values:
|
| 170 |
+
values_flattened.update(v)
|
| 171 |
+
return values_flattened
|
| 172 |
+
|
| 173 |
+
|
| 174 |
+
def parallel_summarization(
|
| 175 |
+
file: str, llm: ChatOpenAI, summarization_kwargs: dict
|
| 176 |
+
) -> str:
|
| 177 |
+
"""Wrapper for the summarization function to make it compatible with gradio.
|
| 178 |
+
|
| 179 |
+
Args:
|
| 180 |
+
file (str): Path to the file. This can either be a local path or a tempfile.TemporaryFileWrapper_.
|
| 181 |
+
llm (ChatOpenAI): Language model.
|
| 182 |
+
summarization_kwargs (dict): Keyword arguments for the summarization.
|
| 183 |
+
|
| 184 |
+
Returns:
|
| 185 |
+
str: Summarization of the file.
|
| 186 |
+
"""
|
| 187 |
+
now = time.time()
|
| 188 |
+
values_flattened = asyncio.run(
|
| 189 |
+
generate_concurrently(file_path=file.name, llm=llm[0])
|
| 190 |
+
)
|
| 191 |
+
print("Time taken: ", time.time() - now)
|
| 192 |
+
|
| 193 |
+
output = f"""
|
| 194 |
+
|
| 195 |
+
{values_flattened["intro"]}
|
| 196 |
+
|
| 197 |
+
{values_flattened["darstellung_des_rechtsproblems"]}
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
II. Die Entscheidung
|
| 201 |
+
|
| 202 |
+
{values_flattened["angaben_ueber_das_urteil"]}
|
| 203 |
+
|
| 204 |
+
{values_flattened["sachverhalt"]}
|
| 205 |
+
|
| 206 |
+
{values_flattened["prozessgeschichte"]}
|
| 207 |
+
|
| 208 |
+
{values_flattened["rechtsproblem"]}
|
| 209 |
+
|
| 210 |
+
{values_flattened["loesung_des_gerichts"]}
|
| 211 |
+
|
| 212 |
+
III. Analyse
|
| 213 |
+
|
| 214 |
+
{values_flattened["loesungsansaetze_zum_problem"]}
|
| 215 |
+
|
| 216 |
+
{values_flattened["analyse_und_einordnung_der_entscheidung"]}
|
| 217 |
+
|
| 218 |
+
{values_flattened["bewertung_und_kritik_der_entscheidung"]}
|
| 219 |
+
|
| 220 |
+
{values_flattened["eigener_loesungsvorschlag"]}
|
| 221 |
+
|
| 222 |
+
{values_flattened["ausblick"]}
|
| 223 |
+
"""
|
| 224 |
+
|
| 225 |
+
return output
|