File size: 3,489 Bytes
98b92ba
 
dbfe535
75de07a
98b92ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dbfe535
98b92ba
dbfe535
 
 
 
 
 
 
 
98b92ba
dbfe535
 
98b92ba
 
 
 
 
dbfe535
 
 
 
 
 
98b92ba
 
 
 
 
 
 
 
 
 
 
 
75de07a
98b92ba
dbfe535
98b92ba
 
 
 
 
 
dbfe535
 
 
 
 
 
 
 
98b92ba
75de07a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import os
import csv
import PyPDF2
import gradio as gr
from transformers import pipeline

# Define the parameters and clauses to extract
KEY_PARAMETERS = [
    "Contract Start Date",
    "Contract End Date",
    "Payment Terms",
    "Renewal Terms",
    "Liability Clause",
    "Termination Clause",
    "Confidentiality Clause",
    "Indemnification Clause",
    "Governing Law",
    "Jurisdiction"
]

# Load PDF file and extract text
def load_contract(file_path):
    text = ""
    with open(file_path, "rb") as f:
        reader = PyPDF2.PdfReader(f)
        for page in reader.pages:
            text += page.extract_text()
    return text

# Initialize Hugging Face pipelines
qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
summarization_pipeline = pipeline("summarization", model="facebook/bart-large-cnn")
translation_pipeline = pipeline("translation_en_to_fr", model="Helsinki-NLP/opus-mt-en-fr")

def extract_parameters(context, question):
    result = qa_pipeline({"context": context, "question": question})
    return result.get("answer", "Not found")

def summarize_contract(context):
    return summarization_pipeline(context, max_length=200, min_length=50, do_sample=False)[0]['summary_text']

def translate_contract(context):
    return translation_pipeline(context)[0]['translation_text']

# Save results to CSV
def save_to_csv(file_name, data):
    file_path = os.path.join("output", file_name)
    fieldnames = ["Parameter", "Value"]
    os.makedirs("output", exist_ok=True)
    with open(file_path, mode="w", newline="", encoding="utf-8") as file:
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        writer.writeheader()
        for key, value in data.items():
            writer.writerow({"Parameter": key, "Value": value})
    return file_path

def process_contract(file_path):
    # Load and process the contract
    full_context = load_contract(file_path)

    # Extract parameters
    results = {}
    for param in KEY_PARAMETERS:
        results[param] = extract_parameters(full_context, f"What is the {param}?")

    # Summarize the contract
    summary = summarize_contract(full_context)
    results["Contract Summary"] = summary

    # Translate the contract summary to French
    translation = translate_contract(summary)
    results["Contract Summary (French)"] = translation

    # Save results to CSV
    output_file = f"results_{os.path.splitext(os.path.basename(file_path))[0]}.csv"
    save_to_csv(output_file, results)

    return results, output_file

def interface(file):
    if not file:
        return "No file provided", None

    results, output_file = process_contract(file.name)

    display_results = "\n".join([f"{key}: {value}" for key, value in results.items()])
    return display_results, output_file

# Create Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("# Contract Intelligence")
    gr.Markdown("Upload a contract PDF to extract key parameters, generate a summary, and translate the summary into French.")

    with gr.Row():
        file_input = gr.File(label="Upload Contract PDF", file_types=[".pdf"])
        result_output = gr.Textbox(label="Extracted Information")
        download_link = gr.Textbox(label="Download CSV File Path")

    process_button = gr.Button("Process Contract")
    process_button.click(interface, inputs=file_input, outputs=[result_output, download_link])

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=8080)