import os import csv import PyPDF2 import gradio as gr from transformers import pipeline # Define the parameters and clauses to extract KEY_PARAMETERS = [ "Contract Start Date", "Contract End Date", "Payment Terms", "Renewal Terms", "Liability Clause", "Termination Clause", "Confidentiality Clause", "Indemnification Clause", "Governing Law", "Jurisdiction" ] # Load PDF file and extract text def load_contract(file_path): text = "" with open(file_path, "rb") as f: reader = PyPDF2.PdfReader(f) for page in reader.pages: text += page.extract_text() return text # Initialize Hugging Face pipelines qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2") summarization_pipeline = pipeline("summarization", model="facebook/bart-large-cnn") translation_pipeline = pipeline("translation_en_to_fr", model="Helsinki-NLP/opus-mt-en-fr") def extract_parameters(context, question): result = qa_pipeline({"context": context, "question": question}) return result.get("answer", "Not found") def summarize_contract(context): return summarization_pipeline(context, max_length=200, min_length=50, do_sample=False)[0]['summary_text'] def translate_contract(context): return translation_pipeline(context)[0]['translation_text'] # Save results to CSV def save_to_csv(file_name, data): file_path = os.path.join("output", file_name) fieldnames = ["Parameter", "Value"] os.makedirs("output", exist_ok=True) with open(file_path, mode="w", newline="", encoding="utf-8") as file: writer = csv.DictWriter(file, fieldnames=fieldnames) writer.writeheader() for key, value in data.items(): writer.writerow({"Parameter": key, "Value": value}) return file_path def process_contract(file_path): # Load and process the contract full_context = load_contract(file_path) # Extract parameters results = {} for param in KEY_PARAMETERS: results[param] = extract_parameters(full_context, f"What is the {param}?") # Summarize the contract summary = summarize_contract(full_context) results["Contract Summary"] = summary # Translate the contract summary to French translation = translate_contract(summary) results["Contract Summary (French)"] = translation # Save results to CSV output_file = f"results_{os.path.splitext(os.path.basename(file_path))[0]}.csv" save_to_csv(output_file, results) return results, output_file def interface(file): if not file: return "No file provided", None results, output_file = process_contract(file.name) display_results = "\n".join([f"{key}: {value}" for key, value in results.items()]) return display_results, output_file # Create Gradio Interface with gr.Blocks() as demo: gr.Markdown("# Contract Intelligence") gr.Markdown("Upload a contract PDF to extract key parameters, generate a summary, and translate the summary into French.") with gr.Row(): file_input = gr.File(label="Upload Contract PDF", file_types=[".pdf"]) result_output = gr.Textbox(label="Extracted Information") download_link = gr.Textbox(label="Download CSV File Path") process_button = gr.Button("Process Contract") process_button.click(interface, inputs=file_input, outputs=[result_output, download_link]) if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=8080)