OCR-endpoint / app.py
elvin.v.mammadov
changes
af20e94
import gradio as gr
import requests
import json
from jiwer import cer, wer
import re
pdf_file_path = 'dummy.pdf'
with open("page_transcriptions.json", encoding="utf-8") as f:
data = json.load(f)
def send_request(url):
try:
with open(pdf_file_path, 'rb') as pdf_file:
files = {
'file': (
'dummy.pdf',
pdf_file,
'application/pdf'
)
}
response = requests.post(url, files=files)
except Exception as e:
return {"Error message: "f"Error occurred while sending request. Error message: {e}"}
try:
response_json = response.json()
except Exception as e:
return {
"Error message": e,
"Response": response.content
}
if isinstance(response_json, list):
for page in response_json:
if isinstance(page, dict):
if "page_number" not in page.keys() or "MD_text" not in page.keys():
return {
"Error message": "Response is not in desired structure. Desired structure: [{'page_number': 1, 'MD_text': 'Extracted text'}]",
"Response": response_json
}
if isinstance(page["page_number"], int) and isinstance(page["MD_text"], str):
continue
else:
return {
"Error message": "'page_number' should be integer and 'MD_text' should be string.",
"Response": response_json
}
else:
return {
"Error message": "List should include only dictionaries.",
"Response": response_json
}
if len(response_json) != len(data):
return {
"Error message": "The number of pages are not equal between transcription and ground truth.",
"Response": response_json
}
final_metrics = []
total_reference = ""
total_hypothesis = ""
for page in response_json:
for transcription in data:
if page["page_number"] == transcription["page_number"]:
reference = transcription['MD_text'].strip()
hypothesis = page['MD_text'].strip()
reference = reference.lower()
hypothesis = hypothesis.lower()
reference = reference.replace("\n", " ")
hypothesis = hypothesis.replace("\n", " ")
reference = re.sub(r'\s+', ' ', reference)
hypothesis = re.sub(r'\s+', ' ', hypothesis)
total_reference += reference
total_reference += " "
total_hypothesis += hypothesis
total_hypothesis += " "
cer_value = max(1 - cer(reference, hypothesis), 0)
wer_value = max(1 - wer(reference, hypothesis), 0)
final_metrics.append({"page_number": page["page_number"], "Character Success Rate (CSR)": round(cer_value, 4), "Word Success Rate (WSR)": round(wer_value, 4), "MD_text_used_for_metrics": hypothesis, "Ground_Truth_used_for_metrics": reference})
global_cer = max(1 - cer(total_reference.strip(), total_hypothesis.strip()), 0)
global_wer = max(1 - wer(total_reference.strip(), total_hypothesis.strip()), 0)
final_metrics.append({"Global CSR": global_cer, "Global WSR": global_wer, "MD_text_used_for_metrics": total_hypothesis.strip(), "Ground_Truth_used_for_metrics": total_reference.strip()})
return final_metrics
else:
return {
"Error message": "Response should be list of dictionaries.",
"Response": response_json
}
with gr.Blocks() as demo:
# Output window (top)
# output = gr.Textbox(
# label="Output",
# lines=45,
# interactive=False
# )
output = gr.JSON(
label="Output"
)
# Input window (bottom)
input_box = gr.Textbox(
label="Input",
lines=1,
placeholder="Type your text here..."
)
send_btn = gr.Button("Send")
# Click handler
send_btn.click(
fn=send_request,
inputs=input_box,
outputs=output
)
# Allow pressing Enter to submit
input_box.submit(
fn=send_request,
inputs=input_box,
outputs=output
)
demo.launch()