Commit
·
51e91cb
1
Parent(s):
86d43bd
Update app.py
Browse files
app.py
CHANGED
|
@@ -12,6 +12,7 @@ from huggingface_hub import Repository
|
|
| 12 |
from datetime import datetime
|
| 13 |
import scipy.ndimage.interpolation as inter
|
| 14 |
import easyocr
|
|
|
|
| 15 |
from PIL import Image
|
| 16 |
from paddleocr import PaddleOCR
|
| 17 |
import socket
|
|
@@ -20,30 +21,32 @@ from huggingface_hub import HfApi
|
|
| 20 |
import smtplib
|
| 21 |
|
| 22 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 23 |
-
mydataset_name = "pragnakalp/OCR-img-to-text"
|
| 24 |
-
print("$$$$$$$$$$$$$$$$$$$$$$$$$$$$",type(mydataset_name))
|
| 25 |
-
hf_writer = gr.HuggingFaceDatasetSaver(HF_TOKEN,mydataset_name)
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
#
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
|
|
|
|
|
|
| 47 |
|
| 48 |
def get_device_ip_address():
|
| 49 |
|
|
@@ -128,31 +131,31 @@ def generate_ocr(Method,img):
|
|
| 128 |
if Method == 'PaddleOCR':
|
| 129 |
text_output = ocr_with_paddle(img)
|
| 130 |
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
return text_output
|
| 157 |
|
| 158 |
except Exception as e:
|
|
@@ -220,8 +223,8 @@ demo = gr.Interface(
|
|
| 220 |
title="Optical Character Recognition",
|
| 221 |
description="Try OCR with different methods",
|
| 222 |
css=".gradio-container {background-color: lightgray} #radio_div {background-color: #FFD8B4; font-size: 40px;}",
|
| 223 |
-
allow_flagging = "manual"
|
| 224 |
-
flagging_dir = "flagged",
|
| 225 |
-
flagging_callback=hf_writer
|
| 226 |
)
|
| 227 |
demo.launch(enable_queue = False)
|
|
|
|
| 12 |
from datetime import datetime
|
| 13 |
import scipy.ndimage.interpolation as inter
|
| 14 |
import easyocr
|
| 15 |
+
from datasets import load_dataset
|
| 16 |
from PIL import Image
|
| 17 |
from paddleocr import PaddleOCR
|
| 18 |
import socket
|
|
|
|
| 21 |
import smtplib
|
| 22 |
|
| 23 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 24 |
+
# mydataset_name = "pragnakalp/OCR-img-to-text"
|
| 25 |
+
# print("$$$$$$$$$$$$$$$$$$$$$$$$$$$$",type(mydataset_name))
|
| 26 |
+
# hf_writer = gr.HuggingFaceDatasetSaver(HF_TOKEN,mydataset_name)
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
# DATASET_REPO_URL = "https://huggingface.co/datasets/pragnakalp/OCR-img-to-text"
|
| 30 |
+
DATA_FILENAME = "ocr_data.csv"
|
| 31 |
+
DATA_FILE = os.path.join("ocr_data", DATA_FILENAME)
|
| 32 |
+
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 33 |
+
DATASET_REPO_ID = "pragnakalp/OCR-img-to-text"
|
| 34 |
+
print("is none?", HF_TOKEN is None)
|
| 35 |
+
try:
|
| 36 |
+
hf_hub_download(
|
| 37 |
+
repo_id=DATASET_REPO_ID,
|
| 38 |
+
filename=DATA_FILENAME,
|
| 39 |
+
cache_dir=DATA_DIRNAME,
|
| 40 |
+
force_filename=DATA_FILENAME
|
| 41 |
+
)
|
| 42 |
+
except:
|
| 43 |
+
print("file not found")
|
| 44 |
+
|
| 45 |
+
repo = Repository(
|
| 46 |
+
local_dir="ocr_data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
dataset = load_dataset("imagefolder", data_dir="pragnakalp/OCR-img-to-text", drop_labels=False)
|
| 50 |
|
| 51 |
def get_device_ip_address():
|
| 52 |
|
|
|
|
| 131 |
if Method == 'PaddleOCR':
|
| 132 |
text_output = ocr_with_paddle(img)
|
| 133 |
|
| 134 |
+
with open(DATA_FILE, "a") as csvfile:
|
| 135 |
+
writer = csv.DictWriter(csvfile, fieldnames=["method", "image", "generated_text"])
|
| 136 |
+
writer.writerow(
|
| 137 |
+
{"method": Method, "image": img, "generated_text": text_output}
|
| 138 |
+
)
|
| 139 |
+
commit_url = repo.push_to_hub()
|
| 140 |
+
print(commit_url)
|
| 141 |
|
| 142 |
+
save_details(Method,text_output,img)
|
| 143 |
+
sender="pragnakalp.dev33@gmail.com"
|
| 144 |
+
password="httscgatatbbxxur"
|
| 145 |
+
reciever="pragnakalp.dev35@gmail.com"
|
| 146 |
+
|
| 147 |
+
s = smtplib.SMTP('smtp.gmail.com', 587)
|
| 148 |
+
s.starttls()
|
| 149 |
+
s.ehlo()
|
| 150 |
+
s.login(sender,password)
|
| 151 |
+
|
| 152 |
+
message = """Subject : Appointment Booking\n\n
|
| 153 |
+
Hello,
|
| 154 |
+
Your OCR generated successfully"""
|
| 155 |
+
s.sendmail(sender, reciever, message)
|
| 156 |
+
s.quit()
|
| 157 |
+
mailsend=1
|
| 158 |
+
print("Send mail successfully")
|
| 159 |
return text_output
|
| 160 |
|
| 161 |
except Exception as e:
|
|
|
|
| 223 |
title="Optical Character Recognition",
|
| 224 |
description="Try OCR with different methods",
|
| 225 |
css=".gradio-container {background-color: lightgray} #radio_div {background-color: #FFD8B4; font-size: 40px;}",
|
| 226 |
+
allow_flagging = "manual"
|
| 227 |
+
# flagging_dir = "flagged",
|
| 228 |
+
# flagging_callback=hf_writer
|
| 229 |
)
|
| 230 |
demo.launch(enable_queue = False)
|