prototype / src /legal_implications.py
fvde's picture
Upload folder using huggingface_hub
c6d3d04
from langchain.chat_models import ChatOpenAI
from src.prompts import (
prompts_parallel_legal_implications,
)
from src.doc_loading import load_docs
from src.llm_utils import async_generate_summary_chain
import time
from typing import List
import asyncio
async def generate_legal_implications_concurrently(
file_paths: List[str],
llm: ChatOpenAI,
summarization_kwargs: dict = {"chain_type": "map_reduce"},
) -> List[dict]:
"""Parallel legal implications extraction. This function is used to run the prompt for differenct docs in parallel.
Args:
file_paths (List[str]): List of file paths. This can either be a local path or a tempfile.TemporaryFileWrapper_.
llm (ChatOpenAI): Language model to use for the legal implications.
summarization_kwargs: Keyword arguments for the summarization.
Returns:
List[dict]: List of legal implications.
"""
default_summarization_kwargs = dict(
map_prompt=prompts_parallel_legal_implications["map_prompt"],
combine_prompt=prompts_parallel_legal_implications["combine_prompt"],
)
default_summarization_kwargs.update(summarization_kwargs)
# create parallel tasks
tasks = []
for file_path in file_paths:
docs = load_docs(file_path=file_path, with_pageinfo=False)
tasks.append(
async_generate_summary_chain(
llm=llm,
docs=docs,
summarization_kwargs=default_summarization_kwargs,
k=file_path.split("/")[-1],
)
)
print(f"Appending task for legal implications: {file_path}")
print("-------------------")
# execute all coroutines concurrently
values = await asyncio.gather(*tasks)
# report return values
values_flattened = {}
for v in values:
values_flattened.update(v)
return values_flattened
def parallel_legal_implications(
files: str, llm: ChatOpenAI, summarization_kwargs: dict = {}
) -> str:
"""Wrapper for the parallel legal implication extraction function to make it compatible with gradio.
Args:
file (str): Path to the file. This can either be a local path or a tempfile.TemporaryFileWrapper_.
llm (ChatOpenAI): Language model.
summarization_kwargs (dict): Keyword arguments for the summarization.
Returns:
str: Legal Implications of the file.
"""
now = time.time()
values_flattened = asyncio.run(
generate_legal_implications_concurrently(
file_paths=[f.name for f in files],
llm=llm[0],
summarization_kwargs=summarization_kwargs,
)
)
print("Time taken for complete legal implications: ", time.time() - now)
output = "Die folgenden rechtlich relevanten Fakten wurden gefunden:\n\n\n\n"
for file_name, legal_implications in values_flattened.items():
output += f"Rechtlich relevanten Fakten für {file_name.capitalize()}:\n\n{legal_implications}\n\n\n"
return output