Spaces:
Sleeping
Sleeping
| import cv2 | |
| import numpy as np | |
| import torch | |
| from PIL import Image | |
| import gradio as gr | |
| import re | |
| from ultralytics import YOLO | |
| import easyocr | |
| from transformers import TrOCRProcessor, VisionEncoderDecoderModel | |
| from scipy.signal import medfilt | |
| # Load models | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| # YOLOv5 for digital meter detection (Pre-trained model) | |
| yolo_model = YOLO("yolov5s.pt") | |
| # OCR Models | |
| ocr_reader = easyocr.Reader(["en"]) # EasyOCR | |
| trocr_processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-stage1") | |
| trocr_model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-stage1").to(device) | |
| # Image Preprocessing (Adaptive Threshold & Sharpening) | |
| def enhance_image(image): | |
| image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | |
| # Apply sharpening | |
| kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]]) | |
| sharpened = cv2.filter2D(image, -1, kernel) | |
| # Adaptive thresholding | |
| thresholded = cv2.adaptiveThreshold(sharpened, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, | |
| cv2.THRESH_BINARY, 11, 2) | |
| return thresholded | |
| # Convert Grayscale to RGB (Fix for TrOCR) | |
| def convert_to_rgb(image): | |
| if len(image.shape) == 2: # Grayscale image | |
| image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB) | |
| return image | |
| # Detect Digital Meter Using YOLOv5 | |
| def detect_meter(image): | |
| results = yolo_model(image) | |
| detected_meters = [] | |
| for result in results: | |
| if hasattr(result, "boxes"): # Ensure correct format | |
| for box in result.boxes: | |
| if box.conf > 0.25: # Lower confidence threshold for better detection | |
| detected_meters.append(box.xyxy.tolist()) | |
| return detected_meters | |
| # Extract Text Using EasyOCR | |
| def extract_text_easyocr(image): | |
| text = " ".join(ocr_reader.readtext(image, detail=0)) | |
| return text | |
| # Extract Text Using TrOCR | |
| def extract_text_trocr(image): | |
| image = convert_to_rgb(image) # Convert grayscale to RGB | |
| image = Image.fromarray(image) | |
| pixel_values = trocr_processor(images=image, return_tensors="pt").pixel_values.to(device) | |
| generated_ids = trocr_model.generate(pixel_values) | |
| text = trocr_processor.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
| return text | |
| # Extract Weight Using Regex | |
| def extract_weight(text): | |
| matches = re.findall(r'\d+\.\d+|\d+', text) # Extract numeric weight | |
| return matches[0] if matches else None # Returns None if no weight detected | |
| # Apply Statistical Filtering for Stability | |
| def filter_weight_values(weights): | |
| if len(weights) > 1: | |
| weights = [float(w) for w in weights] | |
| filtered_weight = medfilt(weights, kernel_size=3)[-1] # Smooth out variations | |
| return str(round(filtered_weight, 2)) | |
| return weights[0] if weights else None | |
| # Full Processing Pipeline (Dynamic Feedback) | |
| def process_image(image): | |
| enhanced = enhance_image(image) | |
| detected_meters = detect_meter(image) | |
| # OCR Extraction | |
| text_easyocr = extract_text_easyocr(enhanced) | |
| text_trocr = extract_text_trocr(enhanced) | |
| # Prioritize numeric values from OCR | |
| weight_easyocr = extract_weight(text_easyocr) | |
| weight_trocr = extract_weight(text_trocr) | |
| final_weights = [w for w in [weight_easyocr, weight_trocr] if w] | |
| final_weight = filter_weight_values(final_weights) | |
| # Handle failed detection cases dynamically | |
| if not final_weight: | |
| return "Try adjusting image clarity or detection thresholds." | |
| return final_weight | |
| # Gradio Interface | |
| iface = gr.Interface(fn=process_image, inputs="image", outputs="text") | |
| iface.launch() | |