import gradio as gr
import requests
import json
from jiwer import cer, wer
import re

pdf_file_path = 'dummy.pdf'

with open("page_transcriptions.json", encoding="utf-8") as f:
    
    data = json.load(f)

def send_request(url):
    
    try:
        
        with open(pdf_file_path, 'rb') as pdf_file:
            
            files = {
                'file': (
                    'dummy.pdf',
                    pdf_file,
                    'application/pdf'
                )
            }
        
            response = requests.post(url, files=files)
    
    except Exception as e:
        
        return {"Error message: "f"Error occurred while sending request. Error message: {e}"}
    
    try:
        
        response_json = response.json()
        
    except Exception as e:
        
        return {
            "Error message": e,
            "Response": response.content
            }
    
    if isinstance(response_json, list):
        
        for page in response_json:
            
            if isinstance(page, dict):
            
                if "page_number" not in page.keys() or "MD_text" not in page.keys():
                    
                    return {
                        "Error message": "Response is not in desired structure. Desired structure: [{'page_number': 1, 'MD_text': 'Extracted text'}]",
                        "Response": response_json
                    }
                    
                if isinstance(page["page_number"], int) and isinstance(page["MD_text"], str):
                    
                    continue
                
                else:
                    
                    return {
                        "Error message": "'page_number' should be integer and 'MD_text' should be string.",
                        "Response": response_json
                    }
            
            else:
                
                return {
                    "Error message": "List should include only dictionaries.",
                    "Response": response_json
                }
            
        if len(response_json) != len(data):
            
            return {
                "Error message": "The number of pages are not equal between transcription and ground truth.",
                "Response": response_json
            }
            
        final_metrics = []
        total_reference = ""
        total_hypothesis = ""
        
        for page in response_json:
            
            for transcription in data:
                
                if page["page_number"] == transcription["page_number"]:
                    
                    reference = transcription['MD_text'].strip()
                    hypothesis = page['MD_text'].strip()
                    
                    reference = reference.lower()
                    hypothesis = hypothesis.lower()
                    
                    reference = reference.replace("\n", " ")
                    hypothesis = hypothesis.replace("\n", " ")
                    
                    reference = re.sub(r'\s+', ' ', reference)
                    hypothesis = re.sub(r'\s+', ' ', hypothesis)
                    
                    total_reference += reference
                    total_reference += " "
                    total_hypothesis += hypothesis
                    total_hypothesis += " "
                    
                    cer_value = max(1 - cer(reference, hypothesis), 0)
                    wer_value = max(1 - wer(reference, hypothesis), 0)
                    
                    final_metrics.append({"page_number": page["page_number"], "Character Success Rate (CSR)": round(cer_value, 4), "Word Success Rate (WSR)": round(wer_value, 4), "MD_text_used_for_metrics": hypothesis, "Ground_Truth_used_for_metrics": reference})
            
        global_cer = max(1 - cer(total_reference.strip(), total_hypothesis.strip()), 0)
        global_wer = max(1 - wer(total_reference.strip(), total_hypothesis.strip()), 0)
        
        final_metrics.append({"Global CSR": global_cer, "Global WSR": global_wer, "MD_text_used_for_metrics": total_hypothesis.strip(), "Ground_Truth_used_for_metrics": total_reference.strip()})
        
        return final_metrics

    else:
        
        return {
            "Error message": "Response should be list of dictionaries.",
            "Response": response_json
        }

with gr.Blocks() as demo:
    
    # Output window (top)
    # output = gr.Textbox(
    #     label="Output",
    #     lines=45,
    #     interactive=False
    # )

    output = gr.JSON(
        label="Output"
    )

    # Input window (bottom)
    input_box = gr.Textbox(
        label="Input",
        lines=1,
        placeholder="Type your text here..."
    )

    send_btn = gr.Button("Send")

    # Click handler
    send_btn.click(
        fn=send_request,
        inputs=input_box,
        outputs=output
    )

    # Allow pressing Enter to submit
    input_box.submit(
        fn=send_request,
        inputs=input_box,
        outputs=output
    )

demo.launch()