Spaces:

lenox-ai
/

prototype

Runtime error

App Files Files Community

fvde commited on Aug 24, 2023

Commit

c6d3d04

1 Parent(s): 2f12302

Upload folder using huggingface_hub

Browse files

Files changed (11) hide show

app.py +4 -10
src/__pycache__/gradio_app.cpython-39.pyc +0 -0
src/__pycache__/legal_implications.cpython-39.pyc +0 -0
src/__pycache__/llm_utils.cpython-39.pyc +0 -0
src/__pycache__/prompts.cpython-39.pyc +0 -0
src/__pycache__/summarization.cpython-39.pyc +0 -0
src/gradio_app.py +46 -9
src/legal_implications.py +86 -0
src/llm_utils.py +27 -1
src/prompts.py +36 -1
src/summarization.py +10 -18

app.py CHANGED Viewed

@@ -19,19 +19,13 @@ if __name__ == "__main__":
     args = parser.parse_args()
     # Default configuration for summarization
-    summarization_default_kwargs = dict(
-        chain_type="map_reduce",
-        map_prompt=prompts["short_de"]["map_prompt"],
-        combine_prompt=prompts["short_de"]["combine_prompt"],
-    )
     # Load configuration from a configuration file
     with open(args.configuration_file, "r") as cgf:
         cgf_kwargs = json.load(cgf)
-    summarization_kwargs = cgf_kwargs.get("summarization_kwargs", {})
-    # Update the configuration with the command line arguments
-    summarization_default_kwargs.update(summarization_kwargs)
     # Load the language model
     llm = load_open_ai_llm(
@@ -42,6 +36,6 @@ if __name__ == "__main__":
     run_summarization_model_gradio(
         llm=llm,
         share_gradio_via_link=cgf_kwargs.get("share_gradio_via_link", False),
-        summarization_kwargs=summarization_default_kwargs,
         run_local=cgf_kwargs.get("run_local", True),
     )

     args = parser.parse_args()
     # Default configuration for summarization
     # Load configuration from a configuration file
     with open(args.configuration_file, "r") as cgf:
         cgf_kwargs = json.load(cgf)
+    summarization_kwargs = cgf_kwargs.get(
+        "summarization_kwargs", {"chain_type": "map_reduce"}
+    )
     # Load the language model
     llm = load_open_ai_llm(
     run_summarization_model_gradio(
         llm=llm,
         share_gradio_via_link=cgf_kwargs.get("share_gradio_via_link", False),
+        summarization_kwargs=summarization_kwargs,
         run_local=cgf_kwargs.get("run_local", True),
     )

src/__pycache__/gradio_app.cpython-39.pyc CHANGED Viewed

Binary files a/src/__pycache__/gradio_app.cpython-39.pyc and b/src/__pycache__/gradio_app.cpython-39.pyc differ

src/__pycache__/legal_implications.cpython-39.pyc ADDED Viewed

Binary file (2.87 kB). View file

src/__pycache__/llm_utils.cpython-39.pyc CHANGED Viewed

Binary files a/src/__pycache__/llm_utils.cpython-39.pyc and b/src/__pycache__/llm_utils.cpython-39.pyc differ

src/__pycache__/prompts.cpython-39.pyc CHANGED Viewed

Binary files a/src/__pycache__/prompts.cpython-39.pyc and b/src/__pycache__/prompts.cpython-39.pyc differ

src/__pycache__/summarization.cpython-39.pyc CHANGED Viewed

Binary files a/src/__pycache__/summarization.cpython-39.pyc and b/src/__pycache__/summarization.cpython-39.pyc differ

src/gradio_app.py CHANGED Viewed

@@ -6,9 +6,9 @@ import gradio as gr
 from langchain.chat_models import ChatOpenAI
 from src.summarization import (
     parallel_summarization,
-    parallel_legal_implications,
     PARALLEL_SUMMARIZATION_MAPPING,
 )
 from src.mailing import send_email
@@ -89,7 +89,7 @@ def load_summary_section(llm: ChatOpenAI):
                 summary_parallel_button = gr.Button(
                     "Parallel Summary", interactive=False
                 )
-                clear = gr.Button("Clear All Components")
             with gr.Column(scale=2):
                 sections_to_select = [
                     i for i in PARALLEL_SUMMARIZATION_MAPPING.keys() if "I." not in i
@@ -156,12 +156,12 @@ def load_summary_section(llm: ChatOpenAI):
         ).then(
             switch_buttons,
             [gr.State(True)],
-            [summary_parallel_button, gr.State(None), gr.State(None)],
             queue=False,
         ).then
         # The clear button clears the dashboard
-        clear.click(lambda: None, None, summary_output, queue=False).then(
             lambda: None, None, file_upload_summary, queue=False
         ).then(lambda: None, None, summary_show_pdf, queue=False).then(
             lambda: None, None, send_email_button, queue=False
@@ -185,11 +185,12 @@ def load_summary_section(llm: ChatOpenAI):
     return summary_section
-def load_legal_implications_section(llm: ChatOpenAI):
     """Load the legal implications section
     Args:
         llm (ChatOpenAI): Language model.
     Returns:
         gr.Block: Legal Implications Section
@@ -227,7 +228,9 @@ def load_legal_implications_section(llm: ChatOpenAI):
                 subject_email_legal_implications = gr.Textbox(
                     label="Subject", placeholder="Enter Subject"
                 )
-                send_email_button = gr.Button("Open Email", interactive=False)
             with gr.Column(scale=3):
                 email_instructions_legal_implications = gr.Textbox(
                     label="Email Instructions",
@@ -260,13 +263,45 @@ def load_legal_implications_section(llm: ChatOpenAI):
             queue=False,
         ).then(
             parallel_legal_implications,
-            [file_upload_legal_implications, gr.State([llm])],
             [legal_implications_output],
             queue=False,
         ).then(
             switch_buttons,
             [gr.State(True)],
-            [extract_legal_implications_button, gr.State(None), gr.State(None)],
             queue=False,
         )
@@ -298,7 +333,9 @@ def run_summarization_model_gradio(
         with gr.Tab("Summarize Verdict"):
             load_summary_section(llm=llm)
         with gr.Tab("Legal Implications"):
-            load_legal_implications_section(llm=llm)
     webui.queue()

 from langchain.chat_models import ChatOpenAI
 from src.summarization import (
     parallel_summarization,
     PARALLEL_SUMMARIZATION_MAPPING,
 )
+from src.legal_implications import parallel_legal_implications
 from src.mailing import send_email
                 summary_parallel_button = gr.Button(
                     "Parallel Summary", interactive=False
                 )
+                clear_button = gr.Button("Clear All Components")
             with gr.Column(scale=2):
                 sections_to_select = [
                     i for i in PARALLEL_SUMMARIZATION_MAPPING.keys() if "I." not in i
         ).then(
             switch_buttons,
             [gr.State(True)],
+            [summary_parallel_button, send_email_button, gr.State(None)],
             queue=False,
         ).then
         # The clear button clears the dashboard
+        clear_button.click(lambda: None, None, summary_output, queue=False).then(
             lambda: None, None, file_upload_summary, queue=False
         ).then(lambda: None, None, summary_show_pdf, queue=False).then(
             lambda: None, None, send_email_button, queue=False
     return summary_section
+def load_legal_implications_section(llm: ChatOpenAI, summarization_kwargs: dict = {}):
     """Load the legal implications section
     Args:
         llm (ChatOpenAI): Language model.
+        summarization_kwargs (dict, optional): Keyword arguments for the summarization. Defaults to {}.
     Returns:
         gr.Block: Legal Implications Section
                 subject_email_legal_implications = gr.Textbox(
                     label="Subject", placeholder="Enter Subject"
                 )
+                send_email_button_legal_implications = gr.Button(
+                    "Open Email", interactive=False
+                )
             with gr.Column(scale=3):
                 email_instructions_legal_implications = gr.Textbox(
                     label="Email Instructions",
             queue=False,
         ).then(
             parallel_legal_implications,
+            [
+                file_upload_legal_implications,
+                gr.State([llm]),
+                gr.State(summarization_kwargs),
+            ],
             [legal_implications_output],
             queue=False,
         ).then(
             switch_buttons,
             [gr.State(True)],
+            [
+                extract_legal_implications_button,
+                send_email_button_legal_implications,
+                gr.State(None),
+            ],
+            queue=False,
+        )
+        # The clear button clears the dashboard
+        clear_legal_implications_button.click(
+            lambda: None, None, legal_implications_output, queue=False
+        ).then(lambda: None, None, file_upload_legal_implications, queue=False).then(
+            lambda: None, None, legal_implications_show_pdf, queue=False
+        ).then(
+            lambda: None, None, send_email_button_legal_implications, queue=False
+        ).then(
+            lambda: None, None, email_instructions_legal_implications, queue=False
+        ).then(
+            lambda: None, None, recipiant_email_legal_implications, queue=False
+        )
+        # Email button click opens the default email client and fills in the email instructions
+        send_email_button_legal_implications.click(
+            send_email,
+            [
+                legal_implications_output,
+                recipiant_email_legal_implications,
+                subject_email_legal_implications,
+                email_instructions_legal_implications,
+            ],
             queue=False,
         )
         with gr.Tab("Summarize Verdict"):
             load_summary_section(llm=llm)
         with gr.Tab("Legal Implications"):
+            load_legal_implications_section(
+                llm=llm, summarization_kwargs=summarization_kwargs
+            )
     webui.queue()

src/legal_implications.py ADDED Viewed

	@@ -0,0 +1,86 @@

+from langchain.chat_models import ChatOpenAI
+from src.prompts import (
+    prompts_parallel_legal_implications,
+)
+from src.doc_loading import load_docs
+from src.llm_utils import async_generate_summary_chain
+import time
+from typing import List
+import asyncio
+async def generate_legal_implications_concurrently(
+    file_paths: List[str],
+    llm: ChatOpenAI,
+    summarization_kwargs: dict = {"chain_type": "map_reduce"},
+) -> List[dict]:
+    """Parallel legal implications extraction. This function is used to run the prompt for differenct docs in parallel.
+    Args:
+        file_paths (List[str]): List of file paths. This can either be a local path or a tempfile.TemporaryFileWrapper_.
+        llm (ChatOpenAI): Language model to use for the legal implications.
+        summarization_kwargs: Keyword arguments for the summarization.
+    Returns:
+        List[dict]: List of legal implications.
+    """
+    default_summarization_kwargs = dict(
+        map_prompt=prompts_parallel_legal_implications["map_prompt"],
+        combine_prompt=prompts_parallel_legal_implications["combine_prompt"],
+    )
+    default_summarization_kwargs.update(summarization_kwargs)
+    # create parallel tasks
+    tasks = []
+    for file_path in file_paths:
+        docs = load_docs(file_path=file_path, with_pageinfo=False)
+        tasks.append(
+            async_generate_summary_chain(
+                llm=llm,
+                docs=docs,
+                summarization_kwargs=default_summarization_kwargs,
+                k=file_path.split("/")[-1],
+            )
+        )
+        print(f"Appending task for legal implications: {file_path}")
+    print("-------------------")
+    # execute all coroutines concurrently
+    values = await asyncio.gather(*tasks)
+    # report return values
+    values_flattened = {}
+    for v in values:
+        values_flattened.update(v)
+    return values_flattened
+def parallel_legal_implications(
+    files: str, llm: ChatOpenAI, summarization_kwargs: dict = {}
+) -> str:
+    """Wrapper for the parallel legal implication extraction function to make it compatible with gradio.
+    Args:
+        file (str): Path to the file. This can either be a local path or a tempfile.TemporaryFileWrapper_.
+        llm (ChatOpenAI): Language model.
+        summarization_kwargs (dict): Keyword arguments for the summarization.
+    Returns:
+        str: Legal Implications of the file.
+    """
+    now = time.time()
+    values_flattened = asyncio.run(
+        generate_legal_implications_concurrently(
+            file_paths=[f.name for f in files],
+            llm=llm[0],
+            summarization_kwargs=summarization_kwargs,
+        )
+    )
+    print("Time taken for complete legal implications: ", time.time() - now)
+    output = "Die folgenden rechtlich relevanten Fakten wurden gefunden:\n\n\n\n"
+    for file_name, legal_implications in values_flattened.items():
+        output += f"Rechtlich relevanten Fakten für {file_name.capitalize()}:\n\n{legal_implications}\n\n\n"
+    return output

src/llm_utils.py CHANGED Viewed

@@ -3,9 +3,10 @@ from langchain.chat_models import ChatOpenAI
 from langchain.docstore.document import Document
 import time
 from typing import List
-async def async_generate(
     llm: ChatOpenAI, docs: List[Document], llm_kwargs: dict, k: str
 ) -> dict:
     """Asyncronous LLMChain function.
@@ -26,3 +27,28 @@ async def async_generate(
     resp = await chain.arun(text=docs)
     print(f"Time taken for {k}: ", time.time() - now)
     return {k: resp}

 from langchain.docstore.document import Document
 import time
 from typing import List
+from langchain.chains.summarize import load_summarize_chain
+async def async_generate_llmchain(
     llm: ChatOpenAI, docs: List[Document], llm_kwargs: dict, k: str
 ) -> dict:
     """Asyncronous LLMChain function.
     resp = await chain.arun(text=docs)
     print(f"Time taken for {k}: ", time.time() - now)
     return {k: resp}
+async def async_generate_summary_chain(
+    llm: ChatOpenAI, docs: List[Document], summarization_kwargs: dict, k: str
+) -> dict:
+    """Asyncronous LLMChain function.
+    Args:
+        llm (ChatOpenAI): Language model to use.
+        docs (List[Document]): List of documents.
+        summarization_kwargs (dict): Keyword arguments for the load_summarize_chain.
+        k (str): Key for a dictionary under which the output is returned.
+    Returns:
+        dict: Dictionary with the summarization.
+    """
+    print(f"Starting summarization for {k}")
+    now = time.time()
+    chain = load_summarize_chain(
+        llm=llm,
+        **summarization_kwargs,
+    )
+    resp = await chain.arun(docs)
+    print(f"Time taken for {k}: ", time.time() - now)
+    return {k: resp}

src/prompts.py CHANGED Viewed

@@ -176,7 +176,7 @@ def get_template_parallel(name: str, headline: str, additional_text: str = ""):
     )
-prompts_parallel = {
     "intro": PromptTemplate(
         input_variables=["text"],
         template=get_template_parallel(name="Einleitung", headline="I. Einleitung"),
@@ -268,3 +268,38 @@ prompts_parallel = {
         ),
     ),
 }

     )
+prompts_parallel_summary = {
     "intro": PromptTemplate(
         input_variables=["text"],
         template=get_template_parallel(name="Einleitung", headline="I. Einleitung"),
         ),
     ),
 }
+prompts_parallel_legal_implications = {
+    "map_prompt": PromptTemplate(
+        input_variables=["text"],
+        template="""
+Der folgende Kontext wird durch dreifache Anführungszeichen begrenzt.
+Kontext:
+```{text}```
+Ein Anwalt berät einen Mandanten und möchte wissen, welche rechtlichen Implikationen der Kontext für sein Unternehmen hat.
+Extrahiere alle wichtigen rechtlich relevanten Informationen.
+Die rechtlich relevanten Informationen müssen zu 100% korrekt sein und schreib nur kurze präzise Stichpunkte!
+Wenn keine rechtlich relevanten Informationen vorhanden sind, schreib: 'Keine rechtlich relevanten Informationen vorhanden.'
+Rechtlich relevante Informationen als Stichpunkte:
+    """,
+    ),
+    ##### SHORT COMBINE
+    "combine_prompt": PromptTemplate(
+        input_variables=["text"],
+        template="""
+Die folgenden rechlich relvanten Fakten als Stichpunkte sind durch dreifache Anführungszeichen begrenzt.
+Rechlich relvanten Fakten:
+```{text}```
+Schreibe einen rechtlich korrekten Text, der die rechlich relvanten Fakten auflistet.
+Schreibe zu jedem rechtlich relevantem Fakt einen kurzen Paragraphen mit sehr wenig Sätzen, der erklärt warum dies rechtlich relevant ist.
+Der Text muss zu 100% korrekt sein!
+Rechtliche relevante Fakten mit kurzen Erklärungen:
+""",
+    ),
+}

src/summarization.py CHANGED Viewed

@@ -1,8 +1,11 @@
 from langchain.chains.summarize import load_summarize_chain
 from langchain.chat_models import ChatOpenAI
-from src.prompts import prompts, prompts_parallel
 from src.doc_loading import load_docs
-from src.llm_utils import async_generate
 import time
 from typing import Dict, List
 import asyncio
@@ -95,9 +98,11 @@ async def generate_summary_concurrently(
     for k in PARALLEL_SUMMARIZATION_ORDER:
         if PARALLEL_SUMMARIZATION_MAPPING_INVERSE.get(k, k) in sections:
             sk = summarization_kwargs.copy()
-            sk["prompt"] = prompts_parallel[k]
-            print(f"Appending task for {k}")
-            tasks.append(async_generate(llm=llm, docs=docs, llm_kwargs=sk, k=k))
     print("-------------------")
     # execute all coroutines concurrently
     values = await asyncio.gather(*tasks)
@@ -173,16 +178,3 @@ def parallel_summarization(file: str, sections: List[str], llm: ChatOpenAI) -> s
         )
     return output
-def parallel_legal_implications(file: str, llm: ChatOpenAI) -> str:
-    """Wrapper for the parallel legal implication extraction function to make it compatible with gradio.
-    Args:
-        file (str): Path to the file. This can either be a local path or a tempfile.TemporaryFileWrapper_.
-        llm (ChatOpenAI): Language model.
-    Returns:
-        str: Legal Implications of the file.
-    """
-    return "TBD"

 from langchain.chains.summarize import load_summarize_chain
 from langchain.chat_models import ChatOpenAI
+from src.prompts import (
+    prompts,
+    prompts_parallel_summary,
+)
 from src.doc_loading import load_docs
+from src.llm_utils import async_generate_llmchain
 import time
 from typing import Dict, List
 import asyncio
     for k in PARALLEL_SUMMARIZATION_ORDER:
         if PARALLEL_SUMMARIZATION_MAPPING_INVERSE.get(k, k) in sections:
             sk = summarization_kwargs.copy()
+            sk["prompt"] = prompts_parallel_summary[k]
+            print(f"Appending task for summary: {k}")
+            tasks.append(
+                async_generate_llmchain(llm=llm, docs=docs, llm_kwargs=sk, k=k)
+            )
     print("-------------------")
     # execute all coroutines concurrently
     values = await asyncio.gather(*tasks)
         )
     return output