fvde commited on
Commit
c6d3d04
·
1 Parent(s): 2f12302

Upload folder using huggingface_hub

Browse files
app.py CHANGED
@@ -19,19 +19,13 @@ if __name__ == "__main__":
19
  args = parser.parse_args()
20
 
21
  # Default configuration for summarization
22
- summarization_default_kwargs = dict(
23
- chain_type="map_reduce",
24
- map_prompt=prompts["short_de"]["map_prompt"],
25
- combine_prompt=prompts["short_de"]["combine_prompt"],
26
- )
27
-
28
  # Load configuration from a configuration file
29
  with open(args.configuration_file, "r") as cgf:
30
  cgf_kwargs = json.load(cgf)
31
- summarization_kwargs = cgf_kwargs.get("summarization_kwargs", {})
32
 
33
- # Update the configuration with the command line arguments
34
- summarization_default_kwargs.update(summarization_kwargs)
 
35
 
36
  # Load the language model
37
  llm = load_open_ai_llm(
@@ -42,6 +36,6 @@ if __name__ == "__main__":
42
  run_summarization_model_gradio(
43
  llm=llm,
44
  share_gradio_via_link=cgf_kwargs.get("share_gradio_via_link", False),
45
- summarization_kwargs=summarization_default_kwargs,
46
  run_local=cgf_kwargs.get("run_local", True),
47
  )
 
19
  args = parser.parse_args()
20
 
21
  # Default configuration for summarization
 
 
 
 
 
 
22
  # Load configuration from a configuration file
23
  with open(args.configuration_file, "r") as cgf:
24
  cgf_kwargs = json.load(cgf)
 
25
 
26
+ summarization_kwargs = cgf_kwargs.get(
27
+ "summarization_kwargs", {"chain_type": "map_reduce"}
28
+ )
29
 
30
  # Load the language model
31
  llm = load_open_ai_llm(
 
36
  run_summarization_model_gradio(
37
  llm=llm,
38
  share_gradio_via_link=cgf_kwargs.get("share_gradio_via_link", False),
39
+ summarization_kwargs=summarization_kwargs,
40
  run_local=cgf_kwargs.get("run_local", True),
41
  )
src/__pycache__/gradio_app.cpython-39.pyc CHANGED
Binary files a/src/__pycache__/gradio_app.cpython-39.pyc and b/src/__pycache__/gradio_app.cpython-39.pyc differ
 
src/__pycache__/legal_implications.cpython-39.pyc ADDED
Binary file (2.87 kB). View file
 
src/__pycache__/llm_utils.cpython-39.pyc CHANGED
Binary files a/src/__pycache__/llm_utils.cpython-39.pyc and b/src/__pycache__/llm_utils.cpython-39.pyc differ
 
src/__pycache__/prompts.cpython-39.pyc CHANGED
Binary files a/src/__pycache__/prompts.cpython-39.pyc and b/src/__pycache__/prompts.cpython-39.pyc differ
 
src/__pycache__/summarization.cpython-39.pyc CHANGED
Binary files a/src/__pycache__/summarization.cpython-39.pyc and b/src/__pycache__/summarization.cpython-39.pyc differ
 
src/gradio_app.py CHANGED
@@ -6,9 +6,9 @@ import gradio as gr
6
  from langchain.chat_models import ChatOpenAI
7
  from src.summarization import (
8
  parallel_summarization,
9
- parallel_legal_implications,
10
  PARALLEL_SUMMARIZATION_MAPPING,
11
  )
 
12
  from src.mailing import send_email
13
 
14
 
@@ -89,7 +89,7 @@ def load_summary_section(llm: ChatOpenAI):
89
  summary_parallel_button = gr.Button(
90
  "Parallel Summary", interactive=False
91
  )
92
- clear = gr.Button("Clear All Components")
93
  with gr.Column(scale=2):
94
  sections_to_select = [
95
  i for i in PARALLEL_SUMMARIZATION_MAPPING.keys() if "I." not in i
@@ -156,12 +156,12 @@ def load_summary_section(llm: ChatOpenAI):
156
  ).then(
157
  switch_buttons,
158
  [gr.State(True)],
159
- [summary_parallel_button, gr.State(None), gr.State(None)],
160
  queue=False,
161
  ).then
162
 
163
  # The clear button clears the dashboard
164
- clear.click(lambda: None, None, summary_output, queue=False).then(
165
  lambda: None, None, file_upload_summary, queue=False
166
  ).then(lambda: None, None, summary_show_pdf, queue=False).then(
167
  lambda: None, None, send_email_button, queue=False
@@ -185,11 +185,12 @@ def load_summary_section(llm: ChatOpenAI):
185
  return summary_section
186
 
187
 
188
- def load_legal_implications_section(llm: ChatOpenAI):
189
  """Load the legal implications section
190
 
191
  Args:
192
  llm (ChatOpenAI): Language model.
 
193
 
194
  Returns:
195
  gr.Block: Legal Implications Section
@@ -227,7 +228,9 @@ def load_legal_implications_section(llm: ChatOpenAI):
227
  subject_email_legal_implications = gr.Textbox(
228
  label="Subject", placeholder="Enter Subject"
229
  )
230
- send_email_button = gr.Button("Open Email", interactive=False)
 
 
231
  with gr.Column(scale=3):
232
  email_instructions_legal_implications = gr.Textbox(
233
  label="Email Instructions",
@@ -260,13 +263,45 @@ def load_legal_implications_section(llm: ChatOpenAI):
260
  queue=False,
261
  ).then(
262
  parallel_legal_implications,
263
- [file_upload_legal_implications, gr.State([llm])],
 
 
 
 
264
  [legal_implications_output],
265
  queue=False,
266
  ).then(
267
  switch_buttons,
268
  [gr.State(True)],
269
- [extract_legal_implications_button, gr.State(None), gr.State(None)],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
  queue=False,
271
  )
272
 
@@ -298,7 +333,9 @@ def run_summarization_model_gradio(
298
  with gr.Tab("Summarize Verdict"):
299
  load_summary_section(llm=llm)
300
  with gr.Tab("Legal Implications"):
301
- load_legal_implications_section(llm=llm)
 
 
302
 
303
  webui.queue()
304
 
 
6
  from langchain.chat_models import ChatOpenAI
7
  from src.summarization import (
8
  parallel_summarization,
 
9
  PARALLEL_SUMMARIZATION_MAPPING,
10
  )
11
+ from src.legal_implications import parallel_legal_implications
12
  from src.mailing import send_email
13
 
14
 
 
89
  summary_parallel_button = gr.Button(
90
  "Parallel Summary", interactive=False
91
  )
92
+ clear_button = gr.Button("Clear All Components")
93
  with gr.Column(scale=2):
94
  sections_to_select = [
95
  i for i in PARALLEL_SUMMARIZATION_MAPPING.keys() if "I." not in i
 
156
  ).then(
157
  switch_buttons,
158
  [gr.State(True)],
159
+ [summary_parallel_button, send_email_button, gr.State(None)],
160
  queue=False,
161
  ).then
162
 
163
  # The clear button clears the dashboard
164
+ clear_button.click(lambda: None, None, summary_output, queue=False).then(
165
  lambda: None, None, file_upload_summary, queue=False
166
  ).then(lambda: None, None, summary_show_pdf, queue=False).then(
167
  lambda: None, None, send_email_button, queue=False
 
185
  return summary_section
186
 
187
 
188
+ def load_legal_implications_section(llm: ChatOpenAI, summarization_kwargs: dict = {}):
189
  """Load the legal implications section
190
 
191
  Args:
192
  llm (ChatOpenAI): Language model.
193
+ summarization_kwargs (dict, optional): Keyword arguments for the summarization. Defaults to {}.
194
 
195
  Returns:
196
  gr.Block: Legal Implications Section
 
228
  subject_email_legal_implications = gr.Textbox(
229
  label="Subject", placeholder="Enter Subject"
230
  )
231
+ send_email_button_legal_implications = gr.Button(
232
+ "Open Email", interactive=False
233
+ )
234
  with gr.Column(scale=3):
235
  email_instructions_legal_implications = gr.Textbox(
236
  label="Email Instructions",
 
263
  queue=False,
264
  ).then(
265
  parallel_legal_implications,
266
+ [
267
+ file_upload_legal_implications,
268
+ gr.State([llm]),
269
+ gr.State(summarization_kwargs),
270
+ ],
271
  [legal_implications_output],
272
  queue=False,
273
  ).then(
274
  switch_buttons,
275
  [gr.State(True)],
276
+ [
277
+ extract_legal_implications_button,
278
+ send_email_button_legal_implications,
279
+ gr.State(None),
280
+ ],
281
+ queue=False,
282
+ )
283
+ # The clear button clears the dashboard
284
+ clear_legal_implications_button.click(
285
+ lambda: None, None, legal_implications_output, queue=False
286
+ ).then(lambda: None, None, file_upload_legal_implications, queue=False).then(
287
+ lambda: None, None, legal_implications_show_pdf, queue=False
288
+ ).then(
289
+ lambda: None, None, send_email_button_legal_implications, queue=False
290
+ ).then(
291
+ lambda: None, None, email_instructions_legal_implications, queue=False
292
+ ).then(
293
+ lambda: None, None, recipiant_email_legal_implications, queue=False
294
+ )
295
+
296
+ # Email button click opens the default email client and fills in the email instructions
297
+ send_email_button_legal_implications.click(
298
+ send_email,
299
+ [
300
+ legal_implications_output,
301
+ recipiant_email_legal_implications,
302
+ subject_email_legal_implications,
303
+ email_instructions_legal_implications,
304
+ ],
305
  queue=False,
306
  )
307
 
 
333
  with gr.Tab("Summarize Verdict"):
334
  load_summary_section(llm=llm)
335
  with gr.Tab("Legal Implications"):
336
+ load_legal_implications_section(
337
+ llm=llm, summarization_kwargs=summarization_kwargs
338
+ )
339
 
340
  webui.queue()
341
 
src/legal_implications.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.chat_models import ChatOpenAI
2
+ from src.prompts import (
3
+ prompts_parallel_legal_implications,
4
+ )
5
+ from src.doc_loading import load_docs
6
+ from src.llm_utils import async_generate_summary_chain
7
+ import time
8
+ from typing import List
9
+ import asyncio
10
+
11
+
12
+ async def generate_legal_implications_concurrently(
13
+ file_paths: List[str],
14
+ llm: ChatOpenAI,
15
+ summarization_kwargs: dict = {"chain_type": "map_reduce"},
16
+ ) -> List[dict]:
17
+ """Parallel legal implications extraction. This function is used to run the prompt for differenct docs in parallel.
18
+
19
+ Args:
20
+ file_paths (List[str]): List of file paths. This can either be a local path or a tempfile.TemporaryFileWrapper_.
21
+ llm (ChatOpenAI): Language model to use for the legal implications.
22
+ summarization_kwargs: Keyword arguments for the summarization.
23
+
24
+ Returns:
25
+ List[dict]: List of legal implications.
26
+ """
27
+ default_summarization_kwargs = dict(
28
+ map_prompt=prompts_parallel_legal_implications["map_prompt"],
29
+ combine_prompt=prompts_parallel_legal_implications["combine_prompt"],
30
+ )
31
+ default_summarization_kwargs.update(summarization_kwargs)
32
+
33
+ # create parallel tasks
34
+ tasks = []
35
+
36
+ for file_path in file_paths:
37
+ docs = load_docs(file_path=file_path, with_pageinfo=False)
38
+
39
+ tasks.append(
40
+ async_generate_summary_chain(
41
+ llm=llm,
42
+ docs=docs,
43
+ summarization_kwargs=default_summarization_kwargs,
44
+ k=file_path.split("/")[-1],
45
+ )
46
+ )
47
+ print(f"Appending task for legal implications: {file_path}")
48
+
49
+ print("-------------------")
50
+ # execute all coroutines concurrently
51
+ values = await asyncio.gather(*tasks)
52
+
53
+ # report return values
54
+ values_flattened = {}
55
+ for v in values:
56
+ values_flattened.update(v)
57
+ return values_flattened
58
+
59
+
60
+ def parallel_legal_implications(
61
+ files: str, llm: ChatOpenAI, summarization_kwargs: dict = {}
62
+ ) -> str:
63
+ """Wrapper for the parallel legal implication extraction function to make it compatible with gradio.
64
+
65
+ Args:
66
+ file (str): Path to the file. This can either be a local path or a tempfile.TemporaryFileWrapper_.
67
+ llm (ChatOpenAI): Language model.
68
+ summarization_kwargs (dict): Keyword arguments for the summarization.
69
+
70
+ Returns:
71
+ str: Legal Implications of the file.
72
+ """
73
+ now = time.time()
74
+ values_flattened = asyncio.run(
75
+ generate_legal_implications_concurrently(
76
+ file_paths=[f.name for f in files],
77
+ llm=llm[0],
78
+ summarization_kwargs=summarization_kwargs,
79
+ )
80
+ )
81
+ print("Time taken for complete legal implications: ", time.time() - now)
82
+ output = "Die folgenden rechtlich relevanten Fakten wurden gefunden:\n\n\n\n"
83
+ for file_name, legal_implications in values_flattened.items():
84
+ output += f"Rechtlich relevanten Fakten für {file_name.capitalize()}:\n\n{legal_implications}\n\n\n"
85
+
86
+ return output
src/llm_utils.py CHANGED
@@ -3,9 +3,10 @@ from langchain.chat_models import ChatOpenAI
3
  from langchain.docstore.document import Document
4
  import time
5
  from typing import List
 
6
 
7
 
8
- async def async_generate(
9
  llm: ChatOpenAI, docs: List[Document], llm_kwargs: dict, k: str
10
  ) -> dict:
11
  """Asyncronous LLMChain function.
@@ -26,3 +27,28 @@ async def async_generate(
26
  resp = await chain.arun(text=docs)
27
  print(f"Time taken for {k}: ", time.time() - now)
28
  return {k: resp}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  from langchain.docstore.document import Document
4
  import time
5
  from typing import List
6
+ from langchain.chains.summarize import load_summarize_chain
7
 
8
 
9
+ async def async_generate_llmchain(
10
  llm: ChatOpenAI, docs: List[Document], llm_kwargs: dict, k: str
11
  ) -> dict:
12
  """Asyncronous LLMChain function.
 
27
  resp = await chain.arun(text=docs)
28
  print(f"Time taken for {k}: ", time.time() - now)
29
  return {k: resp}
30
+
31
+
32
+ async def async_generate_summary_chain(
33
+ llm: ChatOpenAI, docs: List[Document], summarization_kwargs: dict, k: str
34
+ ) -> dict:
35
+ """Asyncronous LLMChain function.
36
+
37
+ Args:
38
+ llm (ChatOpenAI): Language model to use.
39
+ docs (List[Document]): List of documents.
40
+ summarization_kwargs (dict): Keyword arguments for the load_summarize_chain.
41
+ k (str): Key for a dictionary under which the output is returned.
42
+
43
+ Returns:
44
+ dict: Dictionary with the summarization.
45
+ """
46
+ print(f"Starting summarization for {k}")
47
+ now = time.time()
48
+ chain = load_summarize_chain(
49
+ llm=llm,
50
+ **summarization_kwargs,
51
+ )
52
+ resp = await chain.arun(docs)
53
+ print(f"Time taken for {k}: ", time.time() - now)
54
+ return {k: resp}
src/prompts.py CHANGED
@@ -176,7 +176,7 @@ def get_template_parallel(name: str, headline: str, additional_text: str = ""):
176
  )
177
 
178
 
179
- prompts_parallel = {
180
  "intro": PromptTemplate(
181
  input_variables=["text"],
182
  template=get_template_parallel(name="Einleitung", headline="I. Einleitung"),
@@ -268,3 +268,38 @@ prompts_parallel = {
268
  ),
269
  ),
270
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
  )
177
 
178
 
179
+ prompts_parallel_summary = {
180
  "intro": PromptTemplate(
181
  input_variables=["text"],
182
  template=get_template_parallel(name="Einleitung", headline="I. Einleitung"),
 
268
  ),
269
  ),
270
  }
271
+
272
+ prompts_parallel_legal_implications = {
273
+ "map_prompt": PromptTemplate(
274
+ input_variables=["text"],
275
+ template="""
276
+ Der folgende Kontext wird durch dreifache Anführungszeichen begrenzt.
277
+
278
+ Kontext:
279
+ ```{text}```
280
+
281
+ Ein Anwalt berät einen Mandanten und möchte wissen, welche rechtlichen Implikationen der Kontext für sein Unternehmen hat.
282
+ Extrahiere alle wichtigen rechtlich relevanten Informationen.
283
+ Die rechtlich relevanten Informationen müssen zu 100% korrekt sein und schreib nur kurze präzise Stichpunkte!
284
+ Wenn keine rechtlich relevanten Informationen vorhanden sind, schreib: 'Keine rechtlich relevanten Informationen vorhanden.'
285
+
286
+ Rechtlich relevante Informationen als Stichpunkte:
287
+ """,
288
+ ),
289
+ ##### SHORT COMBINE
290
+ "combine_prompt": PromptTemplate(
291
+ input_variables=["text"],
292
+ template="""
293
+ Die folgenden rechlich relvanten Fakten als Stichpunkte sind durch dreifache Anführungszeichen begrenzt.
294
+
295
+ Rechlich relvanten Fakten:
296
+ ```{text}```
297
+
298
+ Schreibe einen rechtlich korrekten Text, der die rechlich relvanten Fakten auflistet.
299
+ Schreibe zu jedem rechtlich relevantem Fakt einen kurzen Paragraphen mit sehr wenig Sätzen, der erklärt warum dies rechtlich relevant ist.
300
+ Der Text muss zu 100% korrekt sein!
301
+
302
+ Rechtliche relevante Fakten mit kurzen Erklärungen:
303
+ """,
304
+ ),
305
+ }
src/summarization.py CHANGED
@@ -1,8 +1,11 @@
1
  from langchain.chains.summarize import load_summarize_chain
2
  from langchain.chat_models import ChatOpenAI
3
- from src.prompts import prompts, prompts_parallel
 
 
 
4
  from src.doc_loading import load_docs
5
- from src.llm_utils import async_generate
6
  import time
7
  from typing import Dict, List
8
  import asyncio
@@ -95,9 +98,11 @@ async def generate_summary_concurrently(
95
  for k in PARALLEL_SUMMARIZATION_ORDER:
96
  if PARALLEL_SUMMARIZATION_MAPPING_INVERSE.get(k, k) in sections:
97
  sk = summarization_kwargs.copy()
98
- sk["prompt"] = prompts_parallel[k]
99
- print(f"Appending task for {k}")
100
- tasks.append(async_generate(llm=llm, docs=docs, llm_kwargs=sk, k=k))
 
 
101
  print("-------------------")
102
  # execute all coroutines concurrently
103
  values = await asyncio.gather(*tasks)
@@ -173,16 +178,3 @@ def parallel_summarization(file: str, sections: List[str], llm: ChatOpenAI) -> s
173
  )
174
 
175
  return output
176
-
177
-
178
- def parallel_legal_implications(file: str, llm: ChatOpenAI) -> str:
179
- """Wrapper for the parallel legal implication extraction function to make it compatible with gradio.
180
-
181
- Args:
182
- file (str): Path to the file. This can either be a local path or a tempfile.TemporaryFileWrapper_.
183
- llm (ChatOpenAI): Language model.
184
-
185
- Returns:
186
- str: Legal Implications of the file.
187
- """
188
- return "TBD"
 
1
  from langchain.chains.summarize import load_summarize_chain
2
  from langchain.chat_models import ChatOpenAI
3
+ from src.prompts import (
4
+ prompts,
5
+ prompts_parallel_summary,
6
+ )
7
  from src.doc_loading import load_docs
8
+ from src.llm_utils import async_generate_llmchain
9
  import time
10
  from typing import Dict, List
11
  import asyncio
 
98
  for k in PARALLEL_SUMMARIZATION_ORDER:
99
  if PARALLEL_SUMMARIZATION_MAPPING_INVERSE.get(k, k) in sections:
100
  sk = summarization_kwargs.copy()
101
+ sk["prompt"] = prompts_parallel_summary[k]
102
+ print(f"Appending task for summary: {k}")
103
+ tasks.append(
104
+ async_generate_llmchain(llm=llm, docs=docs, llm_kwargs=sk, k=k)
105
+ )
106
  print("-------------------")
107
  # execute all coroutines concurrently
108
  values = await asyncio.gather(*tasks)
 
178
  )
179
 
180
  return output