Spaces:
Runtime error
Runtime error
| import os | |
| import cv2 | |
| import json | |
| import easyocr | |
| import datasets | |
| import socket | |
| import requests | |
| import keras_ocr | |
| import numpy as np | |
| import gradio as gr | |
| import pandas as pd | |
| import tensorflow as tf | |
| import re as r | |
| from PIL import Image | |
| from datasets import Image | |
| from datetime import datetime | |
| from paddleocr import PaddleOCR | |
| from urllib.request import urlopen | |
| from huggingface_hub import Repository, upload_file | |
| """ | |
| Paddle OCR | |
| """ | |
| def ocr_with_paddle(img): | |
| finaltext = '' | |
| ocr = PaddleOCR(lang='en', use_angle_cls=True) | |
| # img_path = 'exp.jpeg' | |
| result = ocr.ocr(img) | |
| for i in range(len(result[0])): | |
| text = result[0][i][1][0] | |
| finaltext += ' '+ text | |
| return finaltext | |
| """ | |
| Keras OCR | |
| """ | |
| def ocr_with_keras(img): | |
| output_text = '' | |
| pipeline=keras_ocr.pipeline.Pipeline() | |
| images=[keras_ocr.tools.read(img)] | |
| predictions=pipeline.recognize(images) | |
| first=predictions[0] | |
| for text,box in first: | |
| output_text += ' '+ text | |
| return output_text | |
| """ | |
| easy OCR | |
| """ | |
| # gray scale image | |
| def get_grayscale(image): | |
| return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | |
| # Thresholding or Binarization | |
| def thresholding(src): | |
| return cv2.threshold(src,127,255, cv2.THRESH_TOZERO)[1] | |
| def ocr_with_easy(img): | |
| gray_scale_image=get_grayscale(img) | |
| thresholding(gray_scale_image) | |
| cv2.imwrite('image.png',gray_scale_image) | |
| reader = easyocr.Reader(['th','en']) | |
| bounds = reader.readtext('image.png',paragraph="False",detail = 0) | |
| bounds = ''.join(bounds) | |
| return bounds | |
| """ | |
| Generate OCR | |
| """ | |
| def generate_ocr(Method,input_image): | |
| text_output = '' | |
| if (input_image).any(): | |
| print("Method___________________",Method) | |
| if Method == 'EasyOCR': | |
| text_output = ocr_with_easy(input_image) | |
| if Method == 'KerasOCR': | |
| text_output = ocr_with_keras(input_image) | |
| if Method == 'PaddleOCR': | |
| text_output = ocr_with_paddle(input_image) | |
| flag(Method,input_image,text_output,ip_address,location) | |
| return text_output | |
| else: | |
| raise gr.Error("Please upload an image!!!!") | |
| image = gr.Image(shape=(300, 300)) | |
| method = gr.Radio(["PaddleOCR","EasyOCR", "KerasOCR"],value="PaddleOCR",elem_id="radio_div") | |
| output = gr.Textbox(label="Output",elem_id="opbox") | |
| demo = gr.Interface( | |
| generate_ocr, | |
| [method,image], | |
| output, | |
| title="Optical Character Recognition", | |
| css=".gradio-container {background-color: #C0E1F2} #radio_div {background-color: #ADA5EC; font-size: 40px;} #btn {background-color: #94D68B; font-size: 20px;} #opbox {background-color: #ADA5EC;}", | |
| article="""<p style='text-align: center;'>Feel free to give us your <a href="https://www.pragnakalp.com/contact/" target="_blank">feedback</a> and contact us at | |
| <a href="mailto:letstalk@pragnakalp.com" target="_blank">letstalk@pragnakalp.com</a> And don't forget to check out more interesting | |
| <a href="https://www.pragnakalp.com/services/natural-language-processing-services/" target="_blank">NLP services</a> we are offering.</p> | |
| <p style='text-align: center;'>Developed by :<a href="https://www.pragnakalp.com" target="_blank"> Pragnakalp Techlabs</a></p>""" | |
| ) | |
| demo.launch() | |
| HF_TOKEN = os.environ.get("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") | |
| DATASET_NAME = "ocr-image-to-text" | |
| DATASET_REPO_URL = f"https://huggingface.co/datasets/Mo41/{DATASET_NAME}" | |
| HF_TOKEN = os.environ.get("HF_TOKEN") | |
| DATASET_REPO_ID = "Mo41/ocr-image-to-text" | |
| print("is none?", HF_TOKEN is None) | |
| REPOSITORY_DIR = "data" | |
| LOCAL_DIR = 'data_local' | |
| os.makedirs(LOCAL_DIR,exist_ok=True) | |
| repo = Repository( | |
| local_dir="ocr_data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN | |
| ) | |
| repo.git_pull() | |
| """ | |
| Save generated details | |
| """ | |
| def dump_json(thing,file): | |
| with open(file,'w+',encoding="utf8") as f: | |
| json.dump(thing,f) | |
| def flag(Method,input_image,text_output,ip_address,location): | |
| try: | |
| print("saving data------------------------") | |
| adversarial_number = 0 | |
| adversarial_number = 0 if None else adversarial_number | |
| metadata_name = datetime.now().strftime('%Y-%m-%d %H-%M-%S') | |
| SAVE_FILE_DIR = os.path.join(LOCAL_DIR,metadata_name) | |
| os.makedirs(SAVE_FILE_DIR,exist_ok=True) | |
| image_output_filename = os.path.join(SAVE_FILE_DIR,'image.png') | |
| try: | |
| Image.fromarray(input_image).save(image_output_filename) | |
| except Exception: | |
| raise Exception(f"Had issues saving PIL image to file") | |
| # Write metadata.json to file | |
| json_file_path = os.path.join(SAVE_FILE_DIR,'metadata.jsonl') | |
| metadata= {'id':metadata_name,'method':Method, | |
| 'File_name':'image.png','generated_text':text_output, | |
| 'ip_address': ip_address,'loc': location} | |
| dump_json(metadata,json_file_path) | |
| # Simply upload the image file and metadata using the hub's | |
| upload_file | |
| # Upload the image | |
| repo_image_path = os.path.join(REPOSITORY_DIR,os.path.join | |
| (metadata_name,'image.png')) | |
| _ = upload_file(path_or_fileobj = image_output_filename, | |
| path_in_repo =repo_image_path, | |
| repo_id=DATASET_REPO_ID, | |
| repo_type='dataset', | |
| token=HF_TOKEN | |
| ) | |
| # Upload the metadata | |
| repo_json_path = os.path.join(REPOSITORY_DIR,os.path.join | |
| (metadata_name,'metadata.jsonl')) | |
| _ = upload_file(path_or_fileobj = json_file_path, | |
| path_in_repo =repo_json_path, | |
| repo_id= DATASET_REPO_ID, | |
| repo_type='dataset', | |
| token=HF_TOKEN | |
| ) | |
| adversarial_number+=1 | |
| repo.git_pull() | |
| return "*****Logs save successfully!!!!" | |
| except Exception as e: | |
| return "Error whils saving logs -->"+ str(e) |