File size: 3,014 Bytes
c6d3d04
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
from langchain.chat_models import ChatOpenAI
from src.prompts import (
    prompts_parallel_legal_implications,
)
from src.doc_loading import load_docs
from src.llm_utils import async_generate_summary_chain
import time
from typing import List
import asyncio


async def generate_legal_implications_concurrently(
    file_paths: List[str],
    llm: ChatOpenAI,
    summarization_kwargs: dict = {"chain_type": "map_reduce"},
) -> List[dict]:
    """Parallel legal implications extraction. This function is used to run the prompt for differenct docs in parallel.

    Args:
        file_paths (List[str]): List of file paths. This can either be a local path or a tempfile.TemporaryFileWrapper_.
        llm (ChatOpenAI): Language model to use for the legal implications.
        summarization_kwargs: Keyword arguments for the summarization.

    Returns:
        List[dict]: List of legal implications.
    """
    default_summarization_kwargs = dict(
        map_prompt=prompts_parallel_legal_implications["map_prompt"],
        combine_prompt=prompts_parallel_legal_implications["combine_prompt"],
    )
    default_summarization_kwargs.update(summarization_kwargs)

    # create parallel tasks
    tasks = []

    for file_path in file_paths:
        docs = load_docs(file_path=file_path, with_pageinfo=False)

        tasks.append(
            async_generate_summary_chain(
                llm=llm,
                docs=docs,
                summarization_kwargs=default_summarization_kwargs,
                k=file_path.split("/")[-1],
            )
        )
        print(f"Appending task for legal implications: {file_path}")

    print("-------------------")
    # execute all coroutines concurrently
    values = await asyncio.gather(*tasks)

    # report return values
    values_flattened = {}
    for v in values:
        values_flattened.update(v)
    return values_flattened


def parallel_legal_implications(
    files: str, llm: ChatOpenAI, summarization_kwargs: dict = {}
) -> str:
    """Wrapper for the parallel legal implication extraction function to make it compatible with gradio.

    Args:
        file (str): Path to the file. This can either be a local path or a tempfile.TemporaryFileWrapper_.
        llm (ChatOpenAI): Language model.
        summarization_kwargs (dict): Keyword arguments for the summarization.

    Returns:
        str: Legal Implications of the file.
    """
    now = time.time()
    values_flattened = asyncio.run(
        generate_legal_implications_concurrently(
            file_paths=[f.name for f in files],
            llm=llm[0],
            summarization_kwargs=summarization_kwargs,
        )
    )
    print("Time taken for complete legal implications: ", time.time() - now)
    output = "Die folgenden rechtlich relevanten Fakten wurden gefunden:\n\n\n\n"
    for file_name, legal_implications in values_flattened.items():
        output += f"Rechtlich relevanten Fakten für {file_name.capitalize()}:\n\n{legal_implications}\n\n\n"

    return output