Upload folder using huggingface_hub
Browse files- app.py +4 -10
- src/__pycache__/gradio_app.cpython-39.pyc +0 -0
- src/__pycache__/legal_implications.cpython-39.pyc +0 -0
- src/__pycache__/llm_utils.cpython-39.pyc +0 -0
- src/__pycache__/prompts.cpython-39.pyc +0 -0
- src/__pycache__/summarization.cpython-39.pyc +0 -0
- src/gradio_app.py +46 -9
- src/legal_implications.py +86 -0
- src/llm_utils.py +27 -1
- src/prompts.py +36 -1
- src/summarization.py +10 -18
app.py
CHANGED
|
@@ -19,19 +19,13 @@ if __name__ == "__main__":
|
|
| 19 |
args = parser.parse_args()
|
| 20 |
|
| 21 |
# Default configuration for summarization
|
| 22 |
-
summarization_default_kwargs = dict(
|
| 23 |
-
chain_type="map_reduce",
|
| 24 |
-
map_prompt=prompts["short_de"]["map_prompt"],
|
| 25 |
-
combine_prompt=prompts["short_de"]["combine_prompt"],
|
| 26 |
-
)
|
| 27 |
-
|
| 28 |
# Load configuration from a configuration file
|
| 29 |
with open(args.configuration_file, "r") as cgf:
|
| 30 |
cgf_kwargs = json.load(cgf)
|
| 31 |
-
summarization_kwargs = cgf_kwargs.get("summarization_kwargs", {})
|
| 32 |
|
| 33 |
-
|
| 34 |
-
|
|
|
|
| 35 |
|
| 36 |
# Load the language model
|
| 37 |
llm = load_open_ai_llm(
|
|
@@ -42,6 +36,6 @@ if __name__ == "__main__":
|
|
| 42 |
run_summarization_model_gradio(
|
| 43 |
llm=llm,
|
| 44 |
share_gradio_via_link=cgf_kwargs.get("share_gradio_via_link", False),
|
| 45 |
-
summarization_kwargs=
|
| 46 |
run_local=cgf_kwargs.get("run_local", True),
|
| 47 |
)
|
|
|
|
| 19 |
args = parser.parse_args()
|
| 20 |
|
| 21 |
# Default configuration for summarization
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
# Load configuration from a configuration file
|
| 23 |
with open(args.configuration_file, "r") as cgf:
|
| 24 |
cgf_kwargs = json.load(cgf)
|
|
|
|
| 25 |
|
| 26 |
+
summarization_kwargs = cgf_kwargs.get(
|
| 27 |
+
"summarization_kwargs", {"chain_type": "map_reduce"}
|
| 28 |
+
)
|
| 29 |
|
| 30 |
# Load the language model
|
| 31 |
llm = load_open_ai_llm(
|
|
|
|
| 36 |
run_summarization_model_gradio(
|
| 37 |
llm=llm,
|
| 38 |
share_gradio_via_link=cgf_kwargs.get("share_gradio_via_link", False),
|
| 39 |
+
summarization_kwargs=summarization_kwargs,
|
| 40 |
run_local=cgf_kwargs.get("run_local", True),
|
| 41 |
)
|
src/__pycache__/gradio_app.cpython-39.pyc
CHANGED
|
Binary files a/src/__pycache__/gradio_app.cpython-39.pyc and b/src/__pycache__/gradio_app.cpython-39.pyc differ
|
|
|
src/__pycache__/legal_implications.cpython-39.pyc
ADDED
|
Binary file (2.87 kB). View file
|
|
|
src/__pycache__/llm_utils.cpython-39.pyc
CHANGED
|
Binary files a/src/__pycache__/llm_utils.cpython-39.pyc and b/src/__pycache__/llm_utils.cpython-39.pyc differ
|
|
|
src/__pycache__/prompts.cpython-39.pyc
CHANGED
|
Binary files a/src/__pycache__/prompts.cpython-39.pyc and b/src/__pycache__/prompts.cpython-39.pyc differ
|
|
|
src/__pycache__/summarization.cpython-39.pyc
CHANGED
|
Binary files a/src/__pycache__/summarization.cpython-39.pyc and b/src/__pycache__/summarization.cpython-39.pyc differ
|
|
|
src/gradio_app.py
CHANGED
|
@@ -6,9 +6,9 @@ import gradio as gr
|
|
| 6 |
from langchain.chat_models import ChatOpenAI
|
| 7 |
from src.summarization import (
|
| 8 |
parallel_summarization,
|
| 9 |
-
parallel_legal_implications,
|
| 10 |
PARALLEL_SUMMARIZATION_MAPPING,
|
| 11 |
)
|
|
|
|
| 12 |
from src.mailing import send_email
|
| 13 |
|
| 14 |
|
|
@@ -89,7 +89,7 @@ def load_summary_section(llm: ChatOpenAI):
|
|
| 89 |
summary_parallel_button = gr.Button(
|
| 90 |
"Parallel Summary", interactive=False
|
| 91 |
)
|
| 92 |
-
|
| 93 |
with gr.Column(scale=2):
|
| 94 |
sections_to_select = [
|
| 95 |
i for i in PARALLEL_SUMMARIZATION_MAPPING.keys() if "I." not in i
|
|
@@ -156,12 +156,12 @@ def load_summary_section(llm: ChatOpenAI):
|
|
| 156 |
).then(
|
| 157 |
switch_buttons,
|
| 158 |
[gr.State(True)],
|
| 159 |
-
[summary_parallel_button,
|
| 160 |
queue=False,
|
| 161 |
).then
|
| 162 |
|
| 163 |
# The clear button clears the dashboard
|
| 164 |
-
|
| 165 |
lambda: None, None, file_upload_summary, queue=False
|
| 166 |
).then(lambda: None, None, summary_show_pdf, queue=False).then(
|
| 167 |
lambda: None, None, send_email_button, queue=False
|
|
@@ -185,11 +185,12 @@ def load_summary_section(llm: ChatOpenAI):
|
|
| 185 |
return summary_section
|
| 186 |
|
| 187 |
|
| 188 |
-
def load_legal_implications_section(llm: ChatOpenAI):
|
| 189 |
"""Load the legal implications section
|
| 190 |
|
| 191 |
Args:
|
| 192 |
llm (ChatOpenAI): Language model.
|
|
|
|
| 193 |
|
| 194 |
Returns:
|
| 195 |
gr.Block: Legal Implications Section
|
|
@@ -227,7 +228,9 @@ def load_legal_implications_section(llm: ChatOpenAI):
|
|
| 227 |
subject_email_legal_implications = gr.Textbox(
|
| 228 |
label="Subject", placeholder="Enter Subject"
|
| 229 |
)
|
| 230 |
-
|
|
|
|
|
|
|
| 231 |
with gr.Column(scale=3):
|
| 232 |
email_instructions_legal_implications = gr.Textbox(
|
| 233 |
label="Email Instructions",
|
|
@@ -260,13 +263,45 @@ def load_legal_implications_section(llm: ChatOpenAI):
|
|
| 260 |
queue=False,
|
| 261 |
).then(
|
| 262 |
parallel_legal_implications,
|
| 263 |
-
[
|
|
|
|
|
|
|
|
|
|
|
|
|
| 264 |
[legal_implications_output],
|
| 265 |
queue=False,
|
| 266 |
).then(
|
| 267 |
switch_buttons,
|
| 268 |
[gr.State(True)],
|
| 269 |
-
[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 270 |
queue=False,
|
| 271 |
)
|
| 272 |
|
|
@@ -298,7 +333,9 @@ def run_summarization_model_gradio(
|
|
| 298 |
with gr.Tab("Summarize Verdict"):
|
| 299 |
load_summary_section(llm=llm)
|
| 300 |
with gr.Tab("Legal Implications"):
|
| 301 |
-
load_legal_implications_section(
|
|
|
|
|
|
|
| 302 |
|
| 303 |
webui.queue()
|
| 304 |
|
|
|
|
| 6 |
from langchain.chat_models import ChatOpenAI
|
| 7 |
from src.summarization import (
|
| 8 |
parallel_summarization,
|
|
|
|
| 9 |
PARALLEL_SUMMARIZATION_MAPPING,
|
| 10 |
)
|
| 11 |
+
from src.legal_implications import parallel_legal_implications
|
| 12 |
from src.mailing import send_email
|
| 13 |
|
| 14 |
|
|
|
|
| 89 |
summary_parallel_button = gr.Button(
|
| 90 |
"Parallel Summary", interactive=False
|
| 91 |
)
|
| 92 |
+
clear_button = gr.Button("Clear All Components")
|
| 93 |
with gr.Column(scale=2):
|
| 94 |
sections_to_select = [
|
| 95 |
i for i in PARALLEL_SUMMARIZATION_MAPPING.keys() if "I." not in i
|
|
|
|
| 156 |
).then(
|
| 157 |
switch_buttons,
|
| 158 |
[gr.State(True)],
|
| 159 |
+
[summary_parallel_button, send_email_button, gr.State(None)],
|
| 160 |
queue=False,
|
| 161 |
).then
|
| 162 |
|
| 163 |
# The clear button clears the dashboard
|
| 164 |
+
clear_button.click(lambda: None, None, summary_output, queue=False).then(
|
| 165 |
lambda: None, None, file_upload_summary, queue=False
|
| 166 |
).then(lambda: None, None, summary_show_pdf, queue=False).then(
|
| 167 |
lambda: None, None, send_email_button, queue=False
|
|
|
|
| 185 |
return summary_section
|
| 186 |
|
| 187 |
|
| 188 |
+
def load_legal_implications_section(llm: ChatOpenAI, summarization_kwargs: dict = {}):
|
| 189 |
"""Load the legal implications section
|
| 190 |
|
| 191 |
Args:
|
| 192 |
llm (ChatOpenAI): Language model.
|
| 193 |
+
summarization_kwargs (dict, optional): Keyword arguments for the summarization. Defaults to {}.
|
| 194 |
|
| 195 |
Returns:
|
| 196 |
gr.Block: Legal Implications Section
|
|
|
|
| 228 |
subject_email_legal_implications = gr.Textbox(
|
| 229 |
label="Subject", placeholder="Enter Subject"
|
| 230 |
)
|
| 231 |
+
send_email_button_legal_implications = gr.Button(
|
| 232 |
+
"Open Email", interactive=False
|
| 233 |
+
)
|
| 234 |
with gr.Column(scale=3):
|
| 235 |
email_instructions_legal_implications = gr.Textbox(
|
| 236 |
label="Email Instructions",
|
|
|
|
| 263 |
queue=False,
|
| 264 |
).then(
|
| 265 |
parallel_legal_implications,
|
| 266 |
+
[
|
| 267 |
+
file_upload_legal_implications,
|
| 268 |
+
gr.State([llm]),
|
| 269 |
+
gr.State(summarization_kwargs),
|
| 270 |
+
],
|
| 271 |
[legal_implications_output],
|
| 272 |
queue=False,
|
| 273 |
).then(
|
| 274 |
switch_buttons,
|
| 275 |
[gr.State(True)],
|
| 276 |
+
[
|
| 277 |
+
extract_legal_implications_button,
|
| 278 |
+
send_email_button_legal_implications,
|
| 279 |
+
gr.State(None),
|
| 280 |
+
],
|
| 281 |
+
queue=False,
|
| 282 |
+
)
|
| 283 |
+
# The clear button clears the dashboard
|
| 284 |
+
clear_legal_implications_button.click(
|
| 285 |
+
lambda: None, None, legal_implications_output, queue=False
|
| 286 |
+
).then(lambda: None, None, file_upload_legal_implications, queue=False).then(
|
| 287 |
+
lambda: None, None, legal_implications_show_pdf, queue=False
|
| 288 |
+
).then(
|
| 289 |
+
lambda: None, None, send_email_button_legal_implications, queue=False
|
| 290 |
+
).then(
|
| 291 |
+
lambda: None, None, email_instructions_legal_implications, queue=False
|
| 292 |
+
).then(
|
| 293 |
+
lambda: None, None, recipiant_email_legal_implications, queue=False
|
| 294 |
+
)
|
| 295 |
+
|
| 296 |
+
# Email button click opens the default email client and fills in the email instructions
|
| 297 |
+
send_email_button_legal_implications.click(
|
| 298 |
+
send_email,
|
| 299 |
+
[
|
| 300 |
+
legal_implications_output,
|
| 301 |
+
recipiant_email_legal_implications,
|
| 302 |
+
subject_email_legal_implications,
|
| 303 |
+
email_instructions_legal_implications,
|
| 304 |
+
],
|
| 305 |
queue=False,
|
| 306 |
)
|
| 307 |
|
|
|
|
| 333 |
with gr.Tab("Summarize Verdict"):
|
| 334 |
load_summary_section(llm=llm)
|
| 335 |
with gr.Tab("Legal Implications"):
|
| 336 |
+
load_legal_implications_section(
|
| 337 |
+
llm=llm, summarization_kwargs=summarization_kwargs
|
| 338 |
+
)
|
| 339 |
|
| 340 |
webui.queue()
|
| 341 |
|
src/legal_implications.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain.chat_models import ChatOpenAI
|
| 2 |
+
from src.prompts import (
|
| 3 |
+
prompts_parallel_legal_implications,
|
| 4 |
+
)
|
| 5 |
+
from src.doc_loading import load_docs
|
| 6 |
+
from src.llm_utils import async_generate_summary_chain
|
| 7 |
+
import time
|
| 8 |
+
from typing import List
|
| 9 |
+
import asyncio
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
async def generate_legal_implications_concurrently(
|
| 13 |
+
file_paths: List[str],
|
| 14 |
+
llm: ChatOpenAI,
|
| 15 |
+
summarization_kwargs: dict = {"chain_type": "map_reduce"},
|
| 16 |
+
) -> List[dict]:
|
| 17 |
+
"""Parallel legal implications extraction. This function is used to run the prompt for differenct docs in parallel.
|
| 18 |
+
|
| 19 |
+
Args:
|
| 20 |
+
file_paths (List[str]): List of file paths. This can either be a local path or a tempfile.TemporaryFileWrapper_.
|
| 21 |
+
llm (ChatOpenAI): Language model to use for the legal implications.
|
| 22 |
+
summarization_kwargs: Keyword arguments for the summarization.
|
| 23 |
+
|
| 24 |
+
Returns:
|
| 25 |
+
List[dict]: List of legal implications.
|
| 26 |
+
"""
|
| 27 |
+
default_summarization_kwargs = dict(
|
| 28 |
+
map_prompt=prompts_parallel_legal_implications["map_prompt"],
|
| 29 |
+
combine_prompt=prompts_parallel_legal_implications["combine_prompt"],
|
| 30 |
+
)
|
| 31 |
+
default_summarization_kwargs.update(summarization_kwargs)
|
| 32 |
+
|
| 33 |
+
# create parallel tasks
|
| 34 |
+
tasks = []
|
| 35 |
+
|
| 36 |
+
for file_path in file_paths:
|
| 37 |
+
docs = load_docs(file_path=file_path, with_pageinfo=False)
|
| 38 |
+
|
| 39 |
+
tasks.append(
|
| 40 |
+
async_generate_summary_chain(
|
| 41 |
+
llm=llm,
|
| 42 |
+
docs=docs,
|
| 43 |
+
summarization_kwargs=default_summarization_kwargs,
|
| 44 |
+
k=file_path.split("/")[-1],
|
| 45 |
+
)
|
| 46 |
+
)
|
| 47 |
+
print(f"Appending task for legal implications: {file_path}")
|
| 48 |
+
|
| 49 |
+
print("-------------------")
|
| 50 |
+
# execute all coroutines concurrently
|
| 51 |
+
values = await asyncio.gather(*tasks)
|
| 52 |
+
|
| 53 |
+
# report return values
|
| 54 |
+
values_flattened = {}
|
| 55 |
+
for v in values:
|
| 56 |
+
values_flattened.update(v)
|
| 57 |
+
return values_flattened
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def parallel_legal_implications(
|
| 61 |
+
files: str, llm: ChatOpenAI, summarization_kwargs: dict = {}
|
| 62 |
+
) -> str:
|
| 63 |
+
"""Wrapper for the parallel legal implication extraction function to make it compatible with gradio.
|
| 64 |
+
|
| 65 |
+
Args:
|
| 66 |
+
file (str): Path to the file. This can either be a local path or a tempfile.TemporaryFileWrapper_.
|
| 67 |
+
llm (ChatOpenAI): Language model.
|
| 68 |
+
summarization_kwargs (dict): Keyword arguments for the summarization.
|
| 69 |
+
|
| 70 |
+
Returns:
|
| 71 |
+
str: Legal Implications of the file.
|
| 72 |
+
"""
|
| 73 |
+
now = time.time()
|
| 74 |
+
values_flattened = asyncio.run(
|
| 75 |
+
generate_legal_implications_concurrently(
|
| 76 |
+
file_paths=[f.name for f in files],
|
| 77 |
+
llm=llm[0],
|
| 78 |
+
summarization_kwargs=summarization_kwargs,
|
| 79 |
+
)
|
| 80 |
+
)
|
| 81 |
+
print("Time taken for complete legal implications: ", time.time() - now)
|
| 82 |
+
output = "Die folgenden rechtlich relevanten Fakten wurden gefunden:\n\n\n\n"
|
| 83 |
+
for file_name, legal_implications in values_flattened.items():
|
| 84 |
+
output += f"Rechtlich relevanten Fakten für {file_name.capitalize()}:\n\n{legal_implications}\n\n\n"
|
| 85 |
+
|
| 86 |
+
return output
|
src/llm_utils.py
CHANGED
|
@@ -3,9 +3,10 @@ from langchain.chat_models import ChatOpenAI
|
|
| 3 |
from langchain.docstore.document import Document
|
| 4 |
import time
|
| 5 |
from typing import List
|
|
|
|
| 6 |
|
| 7 |
|
| 8 |
-
async def
|
| 9 |
llm: ChatOpenAI, docs: List[Document], llm_kwargs: dict, k: str
|
| 10 |
) -> dict:
|
| 11 |
"""Asyncronous LLMChain function.
|
|
@@ -26,3 +27,28 @@ async def async_generate(
|
|
| 26 |
resp = await chain.arun(text=docs)
|
| 27 |
print(f"Time taken for {k}: ", time.time() - now)
|
| 28 |
return {k: resp}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
from langchain.docstore.document import Document
|
| 4 |
import time
|
| 5 |
from typing import List
|
| 6 |
+
from langchain.chains.summarize import load_summarize_chain
|
| 7 |
|
| 8 |
|
| 9 |
+
async def async_generate_llmchain(
|
| 10 |
llm: ChatOpenAI, docs: List[Document], llm_kwargs: dict, k: str
|
| 11 |
) -> dict:
|
| 12 |
"""Asyncronous LLMChain function.
|
|
|
|
| 27 |
resp = await chain.arun(text=docs)
|
| 28 |
print(f"Time taken for {k}: ", time.time() - now)
|
| 29 |
return {k: resp}
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
async def async_generate_summary_chain(
|
| 33 |
+
llm: ChatOpenAI, docs: List[Document], summarization_kwargs: dict, k: str
|
| 34 |
+
) -> dict:
|
| 35 |
+
"""Asyncronous LLMChain function.
|
| 36 |
+
|
| 37 |
+
Args:
|
| 38 |
+
llm (ChatOpenAI): Language model to use.
|
| 39 |
+
docs (List[Document]): List of documents.
|
| 40 |
+
summarization_kwargs (dict): Keyword arguments for the load_summarize_chain.
|
| 41 |
+
k (str): Key for a dictionary under which the output is returned.
|
| 42 |
+
|
| 43 |
+
Returns:
|
| 44 |
+
dict: Dictionary with the summarization.
|
| 45 |
+
"""
|
| 46 |
+
print(f"Starting summarization for {k}")
|
| 47 |
+
now = time.time()
|
| 48 |
+
chain = load_summarize_chain(
|
| 49 |
+
llm=llm,
|
| 50 |
+
**summarization_kwargs,
|
| 51 |
+
)
|
| 52 |
+
resp = await chain.arun(docs)
|
| 53 |
+
print(f"Time taken for {k}: ", time.time() - now)
|
| 54 |
+
return {k: resp}
|
src/prompts.py
CHANGED
|
@@ -176,7 +176,7 @@ def get_template_parallel(name: str, headline: str, additional_text: str = ""):
|
|
| 176 |
)
|
| 177 |
|
| 178 |
|
| 179 |
-
|
| 180 |
"intro": PromptTemplate(
|
| 181 |
input_variables=["text"],
|
| 182 |
template=get_template_parallel(name="Einleitung", headline="I. Einleitung"),
|
|
@@ -268,3 +268,38 @@ prompts_parallel = {
|
|
| 268 |
),
|
| 269 |
),
|
| 270 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
)
|
| 177 |
|
| 178 |
|
| 179 |
+
prompts_parallel_summary = {
|
| 180 |
"intro": PromptTemplate(
|
| 181 |
input_variables=["text"],
|
| 182 |
template=get_template_parallel(name="Einleitung", headline="I. Einleitung"),
|
|
|
|
| 268 |
),
|
| 269 |
),
|
| 270 |
}
|
| 271 |
+
|
| 272 |
+
prompts_parallel_legal_implications = {
|
| 273 |
+
"map_prompt": PromptTemplate(
|
| 274 |
+
input_variables=["text"],
|
| 275 |
+
template="""
|
| 276 |
+
Der folgende Kontext wird durch dreifache Anführungszeichen begrenzt.
|
| 277 |
+
|
| 278 |
+
Kontext:
|
| 279 |
+
```{text}```
|
| 280 |
+
|
| 281 |
+
Ein Anwalt berät einen Mandanten und möchte wissen, welche rechtlichen Implikationen der Kontext für sein Unternehmen hat.
|
| 282 |
+
Extrahiere alle wichtigen rechtlich relevanten Informationen.
|
| 283 |
+
Die rechtlich relevanten Informationen müssen zu 100% korrekt sein und schreib nur kurze präzise Stichpunkte!
|
| 284 |
+
Wenn keine rechtlich relevanten Informationen vorhanden sind, schreib: 'Keine rechtlich relevanten Informationen vorhanden.'
|
| 285 |
+
|
| 286 |
+
Rechtlich relevante Informationen als Stichpunkte:
|
| 287 |
+
""",
|
| 288 |
+
),
|
| 289 |
+
##### SHORT COMBINE
|
| 290 |
+
"combine_prompt": PromptTemplate(
|
| 291 |
+
input_variables=["text"],
|
| 292 |
+
template="""
|
| 293 |
+
Die folgenden rechlich relvanten Fakten als Stichpunkte sind durch dreifache Anführungszeichen begrenzt.
|
| 294 |
+
|
| 295 |
+
Rechlich relvanten Fakten:
|
| 296 |
+
```{text}```
|
| 297 |
+
|
| 298 |
+
Schreibe einen rechtlich korrekten Text, der die rechlich relvanten Fakten auflistet.
|
| 299 |
+
Schreibe zu jedem rechtlich relevantem Fakt einen kurzen Paragraphen mit sehr wenig Sätzen, der erklärt warum dies rechtlich relevant ist.
|
| 300 |
+
Der Text muss zu 100% korrekt sein!
|
| 301 |
+
|
| 302 |
+
Rechtliche relevante Fakten mit kurzen Erklärungen:
|
| 303 |
+
""",
|
| 304 |
+
),
|
| 305 |
+
}
|
src/summarization.py
CHANGED
|
@@ -1,8 +1,11 @@
|
|
| 1 |
from langchain.chains.summarize import load_summarize_chain
|
| 2 |
from langchain.chat_models import ChatOpenAI
|
| 3 |
-
from src.prompts import
|
|
|
|
|
|
|
|
|
|
| 4 |
from src.doc_loading import load_docs
|
| 5 |
-
from src.llm_utils import
|
| 6 |
import time
|
| 7 |
from typing import Dict, List
|
| 8 |
import asyncio
|
|
@@ -95,9 +98,11 @@ async def generate_summary_concurrently(
|
|
| 95 |
for k in PARALLEL_SUMMARIZATION_ORDER:
|
| 96 |
if PARALLEL_SUMMARIZATION_MAPPING_INVERSE.get(k, k) in sections:
|
| 97 |
sk = summarization_kwargs.copy()
|
| 98 |
-
sk["prompt"] =
|
| 99 |
-
print(f"Appending task for {k}")
|
| 100 |
-
tasks.append(
|
|
|
|
|
|
|
| 101 |
print("-------------------")
|
| 102 |
# execute all coroutines concurrently
|
| 103 |
values = await asyncio.gather(*tasks)
|
|
@@ -173,16 +178,3 @@ def parallel_summarization(file: str, sections: List[str], llm: ChatOpenAI) -> s
|
|
| 173 |
)
|
| 174 |
|
| 175 |
return output
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
def parallel_legal_implications(file: str, llm: ChatOpenAI) -> str:
|
| 179 |
-
"""Wrapper for the parallel legal implication extraction function to make it compatible with gradio.
|
| 180 |
-
|
| 181 |
-
Args:
|
| 182 |
-
file (str): Path to the file. This can either be a local path or a tempfile.TemporaryFileWrapper_.
|
| 183 |
-
llm (ChatOpenAI): Language model.
|
| 184 |
-
|
| 185 |
-
Returns:
|
| 186 |
-
str: Legal Implications of the file.
|
| 187 |
-
"""
|
| 188 |
-
return "TBD"
|
|
|
|
| 1 |
from langchain.chains.summarize import load_summarize_chain
|
| 2 |
from langchain.chat_models import ChatOpenAI
|
| 3 |
+
from src.prompts import (
|
| 4 |
+
prompts,
|
| 5 |
+
prompts_parallel_summary,
|
| 6 |
+
)
|
| 7 |
from src.doc_loading import load_docs
|
| 8 |
+
from src.llm_utils import async_generate_llmchain
|
| 9 |
import time
|
| 10 |
from typing import Dict, List
|
| 11 |
import asyncio
|
|
|
|
| 98 |
for k in PARALLEL_SUMMARIZATION_ORDER:
|
| 99 |
if PARALLEL_SUMMARIZATION_MAPPING_INVERSE.get(k, k) in sections:
|
| 100 |
sk = summarization_kwargs.copy()
|
| 101 |
+
sk["prompt"] = prompts_parallel_summary[k]
|
| 102 |
+
print(f"Appending task for summary: {k}")
|
| 103 |
+
tasks.append(
|
| 104 |
+
async_generate_llmchain(llm=llm, docs=docs, llm_kwargs=sk, k=k)
|
| 105 |
+
)
|
| 106 |
print("-------------------")
|
| 107 |
# execute all coroutines concurrently
|
| 108 |
values = await asyncio.gather(*tasks)
|
|
|
|
| 178 |
)
|
| 179 |
|
| 180 |
return output
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|