| import gradio as gr |
| import requests |
| import tensorflow as tf |
| import keras_ocr |
| import cv2 |
| import os |
| import csv |
| import numpy as np |
| import pandas as pd |
| import huggingface_hub |
| from huggingface_hub import Repository |
| from datetime import datetime |
| import scipy.ndimage.interpolation as inter |
| import easyocr |
| from datasets import load_dataset |
| from PIL import Image |
| from paddleocr import PaddleOCR |
| import socket |
| |
| from huggingface_hub import HfApi |
| import smtplib |
|
|
| HF_TOKEN = os.environ.get("HF_TOKEN") |
| |
| |
| |
|
|
|
|
| |
| DATA_FILENAME = "ocr_data.csv" |
| DATA_FILE = os.path.join("ocr_data", DATA_FILENAME) |
| HF_TOKEN = os.environ.get("HF_TOKEN") |
| DATASET_REPO_ID = "pragnakalp/OCR-img-to-text" |
| print("is none?", HF_TOKEN is None) |
| try: |
| hf_hub_download( |
| repo_id=DATASET_REPO_ID, |
| filename=DATA_FILENAME, |
| cache_dir=DATA_DIRNAME, |
| force_filename=DATA_FILENAME |
| ) |
| except: |
| print("file not found") |
|
|
| repo = Repository( |
| local_dir="ocr_data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN |
| ) |
|
|
| dataset = load_dataset("imagefolder", data_dir="pragnakalp/OCR-img-to-text", drop_labels=False) |
|
|
| def get_device_ip_address(): |
|
|
| if os.name == "nt": |
| result = "Running on Windows" |
| hostname = socket.gethostname() |
| result += "\nHostname: " + hostname |
| host = socket.gethostbyname(hostname) |
| result += "\nHost-IP-Address:" + host |
| return result |
| elif os.name == "posix": |
| gw = os.popen("ip -4 route show default").read().split() |
| s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) |
| s.connect((gw[2], 0)) |
| ipaddr = s.getsockname()[0] |
| gateway = gw[2] |
| host = socket.gethostname() |
| result = "\nIP address:\t\t" + ipaddr + "\r\nHost:\t\t" + host |
| return result |
| else: |
| result = os.name + " not supported yet." |
| return result |
|
|
| |
| """ |
| Paddle OCR |
| """ |
| def ocr_with_paddle(img): |
| finaltext = '' |
| ocr = PaddleOCR(lang='en', use_angle_cls=True) |
| |
| result = ocr.ocr(img) |
| |
| for i in range(len(result[0])): |
| text = result[0][i][1][0] |
| finaltext += ' '+ text |
| return finaltext |
|
|
| """ |
| Keras OCR |
| """ |
| def ocr_with_keras(img): |
| output_text = '' |
| pipeline=keras_ocr.pipeline.Pipeline() |
| images=[keras_ocr.tools.read(img)] |
| predictions=pipeline.recognize(images) |
| first=predictions[0] |
| for text,box in first: |
| output_text += ' '+ text |
| return output_text |
|
|
| """ |
| easy OCR |
| """ |
| |
| def get_grayscale(image): |
| return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) |
|
|
| |
| def thresholding(src): |
| return cv2.threshold(src,127,255, cv2.THRESH_TOZERO)[1] |
| def ocr_with_easy(img): |
| gray_scale_image=get_grayscale(img) |
| thresholding(gray_scale_image) |
| cv2.imwrite('image.png',gray_scale_image) |
| reader = easyocr.Reader(['th','en']) |
| bounds = reader.readtext('image.png',paragraph="False",detail = 0) |
| bounds = ''.join(bounds) |
| return bounds |
| """ |
| Generate OCR |
| """ |
| def generate_ocr(Method,img): |
| try: |
| text_output = '' |
|
|
| print("Method___________________",Method) |
| if Method == 'EasyOCR': |
| text_output = ocr_with_easy(img) |
| if Method == 'KerasOCR': |
| text_output = ocr_with_keras(img) |
| if Method == 'PaddleOCR': |
| text_output = ocr_with_paddle(img) |
|
|
| with open(DATA_FILE, "a") as csvfile: |
| writer = csv.DictWriter(csvfile, fieldnames=["method", "image", "generated_text"]) |
| writer.writerow( |
| {"method": Method, "image": img, "generated_text": text_output} |
| ) |
| commit_url = repo.push_to_hub() |
| print(commit_url) |
| |
| save_details(Method,text_output,img) |
| sender="pragnakalp.dev33@gmail.com" |
| password="httscgatatbbxxur" |
| reciever="pragnakalp.dev35@gmail.com" |
|
|
| s = smtplib.SMTP('smtp.gmail.com', 587) |
| s.starttls() |
| s.ehlo() |
| s.login(sender,password) |
|
|
| message = """Subject : Appointment Booking\n\n |
| Hello, |
| Your OCR generated successfully""" |
| s.sendmail(sender, reciever, message) |
| s.quit() |
| mailsend=1 |
| print("Send mail successfully") |
| return text_output |
| |
| except Exception as e: |
| print("Error in ocr generation ==>",e) |
| text_output = "Something went wrong" |
| return text_output |
| """ |
| Save generated details |
| """ |
| def save_details(Method,text_output,img): |
| |
| hostname = get_device_ip_address() |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| return send_user_email() |
| |
|
|
| """ |
| Create user interface for OCR demo |
| """ |
|
|
| image = gr.Image(shape=(224, 224),elem_id="img_div") |
| method = gr.Radio(["EasyOCR", "KerasOCR", "PaddleOCR"],value="PaddleOCR",elem_id="radio_div") |
| output = gr.Textbox(label="Output") |
|
|
| demo = gr.Interface( |
| generate_ocr, |
| [method,image], |
| output, |
| title="Optical Character Recognition", |
| description="Try OCR with different methods", |
| css=".gradio-container {background-color: lightgray} #radio_div {background-color: #FFD8B4; font-size: 40px;}", |
| allow_flagging = "manual" |
| |
| |
| ) |
| demo.launch(enable_queue = False) |