Spaces:
Build error
Build error
File size: 3,489 Bytes
98b92ba dbfe535 75de07a 98b92ba dbfe535 98b92ba dbfe535 98b92ba dbfe535 98b92ba dbfe535 98b92ba 75de07a 98b92ba dbfe535 98b92ba dbfe535 98b92ba 75de07a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 | import os
import csv
import PyPDF2
import gradio as gr
from transformers import pipeline
# Define the parameters and clauses to extract
KEY_PARAMETERS = [
"Contract Start Date",
"Contract End Date",
"Payment Terms",
"Renewal Terms",
"Liability Clause",
"Termination Clause",
"Confidentiality Clause",
"Indemnification Clause",
"Governing Law",
"Jurisdiction"
]
# Load PDF file and extract text
def load_contract(file_path):
text = ""
with open(file_path, "rb") as f:
reader = PyPDF2.PdfReader(f)
for page in reader.pages:
text += page.extract_text()
return text
# Initialize Hugging Face pipelines
qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
summarization_pipeline = pipeline("summarization", model="facebook/bart-large-cnn")
translation_pipeline = pipeline("translation_en_to_fr", model="Helsinki-NLP/opus-mt-en-fr")
def extract_parameters(context, question):
result = qa_pipeline({"context": context, "question": question})
return result.get("answer", "Not found")
def summarize_contract(context):
return summarization_pipeline(context, max_length=200, min_length=50, do_sample=False)[0]['summary_text']
def translate_contract(context):
return translation_pipeline(context)[0]['translation_text']
# Save results to CSV
def save_to_csv(file_name, data):
file_path = os.path.join("output", file_name)
fieldnames = ["Parameter", "Value"]
os.makedirs("output", exist_ok=True)
with open(file_path, mode="w", newline="", encoding="utf-8") as file:
writer = csv.DictWriter(file, fieldnames=fieldnames)
writer.writeheader()
for key, value in data.items():
writer.writerow({"Parameter": key, "Value": value})
return file_path
def process_contract(file_path):
# Load and process the contract
full_context = load_contract(file_path)
# Extract parameters
results = {}
for param in KEY_PARAMETERS:
results[param] = extract_parameters(full_context, f"What is the {param}?")
# Summarize the contract
summary = summarize_contract(full_context)
results["Contract Summary"] = summary
# Translate the contract summary to French
translation = translate_contract(summary)
results["Contract Summary (French)"] = translation
# Save results to CSV
output_file = f"results_{os.path.splitext(os.path.basename(file_path))[0]}.csv"
save_to_csv(output_file, results)
return results, output_file
def interface(file):
if not file:
return "No file provided", None
results, output_file = process_contract(file.name)
display_results = "\n".join([f"{key}: {value}" for key, value in results.items()])
return display_results, output_file
# Create Gradio Interface
with gr.Blocks() as demo:
gr.Markdown("# Contract Intelligence")
gr.Markdown("Upload a contract PDF to extract key parameters, generate a summary, and translate the summary into French.")
with gr.Row():
file_input = gr.File(label="Upload Contract PDF", file_types=[".pdf"])
result_output = gr.Textbox(label="Extracted Information")
download_link = gr.Textbox(label="Download CSV File Path")
process_button = gr.Button("Process Contract")
process_button.click(interface, inputs=file_input, outputs=[result_output, download_link])
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=8080)
|