Spaces:
Runtime error
Runtime error
| import os | |
| import numpy as np | |
| import json | |
| import requests | |
| from urllib.request import urlopen | |
| from datetime import datetime | |
| import gradio as gr | |
| from paddleocr import PaddleOCR | |
| from PIL import Image as PILImage | |
| from huggingface_hub import Repository, upload_file | |
| import spaces | |
| # Global Variables | |
| HF_TOKEN = os.environ.get("HF_TOKEN") | |
| DATASET_REPO_URL = "https://huggingface.co/ImranzamanML/image_to_text_ocr" | |
| DATASET_REPO_ID = "ImranzamanML/image_to_text_ocr" | |
| REPOSITORY_DIR = "data" | |
| LOCAL_DIR = 'data_local' | |
| os.makedirs(LOCAL_DIR, exist_ok=True) | |
| """ | |
| OCR using PaddleOCR | |
| """ | |
| def process_image_with_paddleocr(image): | |
| final_text = '' | |
| ocr = PaddleOCR(use_gpu=True, lang='en', use_angle_cls=True) | |
| result = ocr.ocr(image) | |
| for line in result[0]: | |
| text = line[1][0] | |
| final_text += ' ' + text | |
| return final_text | |
| """ | |
| Utility Functions | |
| """ | |
| def save_json(data, filepath): | |
| with open(filepath, 'w+', encoding="utf8") as f: | |
| json.dump(data, f) | |
| def get_ip_address(): | |
| try: | |
| response = str(urlopen('http://checkip.dyndns.com/').read()) | |
| return re.compile(r'Address: (\d+\.\d+\.\d+\.\d+)').search(response).group(1) | |
| except Exception as e: | |
| print("Error while getting IP address -->", e) | |
| return '' | |
| def fetch_location(ip_addr): | |
| try: | |
| req_data = {"ip": ip_addr, "token": "pkml123"} | |
| url = "https://demos.pragnakalp.com/get-ip-location" | |
| headers = {'Content-Type': 'application/json'} | |
| response = requests.post(url, headers=headers, data=json.dumps(req_data)).json() | |
| return response | |
| except Exception as e: | |
| print("Error while getting location -->", e) | |
| return {} | |
| def log_ocr_data(text_output, input_image): | |
| print("Logging OCR data...") | |
| ip_address = get_ip_address() | |
| location_info = fetch_location(ip_address) | |
| timestamp = datetime.now().strftime('%Y-%m-%d %H-%M-%S') | |
| save_dir = os.path.join(LOCAL_DIR, timestamp) | |
| os.makedirs(save_dir, exist_ok=True) | |
| image_filename = os.path.join(save_dir, 'image.png') | |
| try: | |
| PILImage.fromarray(input_image).save(image_filename) | |
| except Exception: | |
| raise Exception(f"Failed to save image as file") | |
| metadata_file_path = os.path.join(save_dir, 'metadata.jsonl') | |
| metadata = { | |
| 'id': timestamp, | |
| 'method': "PaddleOCR", | |
| 'file_name': 'image.png', | |
| 'generated_text': text_output, | |
| 'ip': ip_address, | |
| 'location': location_info | |
| } | |
| save_json(metadata, metadata_file_path) | |
| repo_image_path = os.path.join(REPOSITORY_DIR, os.path.join(timestamp, 'image.png')) | |
| _ = upload_file( | |
| path_or_fileobj=image_filename, | |
| path_in_repo=repo_image_path, | |
| repo_id=DATASET_REPO_ID, | |
| repo_type='dataset', | |
| token=HF_TOKEN | |
| ) | |
| repo_json_path = os.path.join(REPOSITORY_DIR, os.path.join(timestamp, 'metadata.jsonl')) | |
| _ = upload_file( | |
| path_or_fileobj=metadata_file_path, | |
| path_in_repo=repo_json_path, | |
| repo_id=DATASET_REPO_ID, | |
| repo_type='dataset', | |
| token=HF_TOKEN | |
| ) | |
| repo.git_pull() | |
| url = 'http://pragnakalpdev35.pythonanywhere.com/HF_space_image_to_text' | |
| payload = { | |
| 'Method': "PaddleOCR", | |
| 'text_output': text_output, | |
| 'img': input_image.tolist(), | |
| 'ip_address': ip_address, | |
| 'loc': location_info | |
| } | |
| response = requests.post(url, json=payload) | |
| print("Mail status code:", response.status_code) | |
| return "***** Logs saved successfully! *****" | |
| """ | |
| OCR Generation | |
| """ | |
| def generate_ocr_text(image): | |
| text_output = '' | |
| if image.any(): | |
| text_output = process_image_with_paddleocr(image) | |
| try: | |
| log_ocr_data(text_output, image) | |
| except Exception as e: | |
| print(e) | |
| return text_output | |
| else: | |
| raise gr.Error("Please upload an image!") | |
| """ | |
| Create user interface for OCR demo | |
| """ | |
| image_input = gr.Image(label="Upload Image", type="numpy") | |
| output_textbox = gr.Textbox(label="Recognized Text", lines=5, placeholder="OCR results will appear here...") | |
| demo = gr.Interface( | |
| fn=generate_ocr_text, | |
| inputs=[image_input], | |
| outputs=output_textbox, | |
| title="Image to Text OCR", | |
| description="Upload an image and extract text. This tool supports multiple languages and handles complex layouts.", | |
| theme="default", | |
| css=""" | |
| .gradio-container { | |
| background-color: #f0f4f8; | |
| font-family: 'Roboto', sans-serif; | |
| padding: 20px; | |
| border-radius: 10px; | |
| box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); | |
| } | |
| .gr-button { | |
| background-color: #007bff; | |
| color: white; | |
| border-radius: 5px; | |
| padding: 10px 20px; | |
| font-size: 16px; | |
| cursor: pointer; | |
| } | |
| .gr-button:hover { | |
| background-color: #0056b3; | |
| } | |
| .gr-textbox { | |
| background-color: #ffffff; | |
| border: 1px solid #ced4da; | |
| border-radius: 5px; | |
| padding: 10px; | |
| font-size: 16px; | |
| } | |
| .gr-textbox:focus { | |
| border-color: #007bff; | |
| box-shadow: 0 0 0 0.2rem rgba(0,123,255,.25); | |
| } | |
| """) | |
| demo.launch() | |