from langchain.chat_models import ChatOpenAI from src.prompts import ( prompts_parallel_legal_implications, ) from src.doc_loading import load_docs from src.llm_utils import async_generate_summary_chain import time from typing import List import asyncio async def generate_legal_implications_concurrently( file_paths: List[str], llm: ChatOpenAI, summarization_kwargs: dict = {"chain_type": "map_reduce"}, ) -> List[dict]: """Parallel legal implications extraction. This function is used to run the prompt for differenct docs in parallel. Args: file_paths (List[str]): List of file paths. This can either be a local path or a tempfile.TemporaryFileWrapper_. llm (ChatOpenAI): Language model to use for the legal implications. summarization_kwargs: Keyword arguments for the summarization. Returns: List[dict]: List of legal implications. """ default_summarization_kwargs = dict( map_prompt=prompts_parallel_legal_implications["map_prompt"], combine_prompt=prompts_parallel_legal_implications["combine_prompt"], ) default_summarization_kwargs.update(summarization_kwargs) # create parallel tasks tasks = [] for file_path in file_paths: docs = load_docs(file_path=file_path, with_pageinfo=False) tasks.append( async_generate_summary_chain( llm=llm, docs=docs, summarization_kwargs=default_summarization_kwargs, k=file_path.split("/")[-1], ) ) print(f"Appending task for legal implications: {file_path}") print("-------------------") # execute all coroutines concurrently values = await asyncio.gather(*tasks) # report return values values_flattened = {} for v in values: values_flattened.update(v) return values_flattened def parallel_legal_implications( files: str, llm: ChatOpenAI, summarization_kwargs: dict = {} ) -> str: """Wrapper for the parallel legal implication extraction function to make it compatible with gradio. Args: file (str): Path to the file. This can either be a local path or a tempfile.TemporaryFileWrapper_. llm (ChatOpenAI): Language model. summarization_kwargs (dict): Keyword arguments for the summarization. Returns: str: Legal Implications of the file. """ now = time.time() values_flattened = asyncio.run( generate_legal_implications_concurrently( file_paths=[f.name for f in files], llm=llm[0], summarization_kwargs=summarization_kwargs, ) ) print("Time taken for complete legal implications: ", time.time() - now) output = "Die folgenden rechtlich relevanten Fakten wurden gefunden:\n\n\n\n" for file_name, legal_implications in values_flattened.items(): output += f"Rechtlich relevanten Fakten für {file_name.capitalize()}:\n\n{legal_implications}\n\n\n" return output