Spaces:

syedfaisalabrar
/

License_Classification

Sleeping

App Files Files Community

License_Classification / app.py

syedfaisalabrar

Update app.py

c68f3a4 verified about 1 year ago

raw

history blame

4.21 kB

	import gradio as gr
	import torch
	import cv2
	import os
	import numpy as np
	from PIL import Image, ImageEnhance
	from ultralytics import YOLO
	from decord import VideoReader, cpu
	from torchvision.transforms.functional import InterpolationMode
	from transformers import AutoModel, AutoTokenizer
	from backPrompt import main as main_b
	from frontPrompt import main as main_f

	model_path = "best.pt"
	modelY = YOLO(model_path)

	path = "OpenGVLab/InternVL2_5-4B"
	model = AutoModel.from_pretrained(
	path,
	torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
	# load_in_8bit=True,
	low_cpu_mem_usage=True,
	use_flash_attn=True,
	trust_remote_code=True).eval()
	tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)

	def preprocessing(image):
	"""Apply three enhancement filters, including brightness reduction, and resize."""
	image = Image.fromarray(np.array(image))


	image = ImageEnhance.Sharpness(image).enhance(2.0) # Increase sharpness
	image = ImageEnhance.Contrast(image).enhance(1.5) # Increase contrast
	image = ImageEnhance.Brightness(image).enhance(0.8) # Reduce brightness


	width = 800
	aspect_ratio = image.height / image.width
	height = int(width * aspect_ratio)
	image = image.resize((width, height))

	return image


	def imageRotation(image):
	if image.height > image.width:
	return image.rotate(90, expand=True)
	return image


	def detect_document(image):
	"""Detects front and back of the document using YOLO."""
	image = np.array(image)
	results = modelY(image, conf=0.85)

	detected_classes = set()
	labels = []
	bounding_boxes = []

	for result in results:
	for box in result.boxes:
	x1, y1, x2, y2 = map(int, box.xyxy[0])
	conf = box.conf[0]
	cls = int(box.cls[0])
	class_name = modelY.names[cls]

	detected_classes.add(class_name)
	label = f"{class_name} {conf:.2f}"
	labels.append(label)
	bounding_boxes.append((x1, y1, x2, y2, class_name, conf)) # Store bounding box with class and confidence

	cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
	cv2.putText(image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

	possible_classes = {"front", "back"}
	missing_classes = possible_classes - detected_classes
	if missing_classes:
	labels.append(f"Missing: {', '.join(missing_classes)}")

	return Image.fromarray(image), labels, bounding_boxes


	def crop_image(image, bounding_boxes):
	"""Crops detected bounding boxes from the image."""
	cropped_images = {}
	image = np.array(image)

	for (x1, y1, x2, y2, class_name, conf) in bounding_boxes:
	cropped = image[y1:y2, x1:x2]
	cropped_images[class_name] = Image.fromarray(cropped)

	return cropped_images


	def vision_ai_api(image, doc_type):

	if doc_type == "front":
	results = main_f(image,model,tokenizer)
	if doc_type == "back":
	results = main_b(image,model,tokenizer)

	return results


	def predict(image):
	"""Pipeline: Preprocess -> Detect -> Crop -> Vision AI API."""
	processed_image = preprocessing(image)
	rotated_image = imageRotation(processed_image) # Placeholder for rotation
	detected_image, labels, bounding_boxes = detect_document(rotated_image)

	cropped_images = crop_image(rotated_image, bounding_boxes)

	# Call Vision AI separately for front and back if detected
	front_result, back_result = None, None
	if "front" in cropped_images:
	front_result = vision_ai_api(cropped_images["front"], "front")
	if "back" in cropped_images:
	back_result = vision_ai_api(cropped_images["back"], "back")


	api_results = {
	"front": front_result,
	"back": back_result
	}
	single_image = cropped_images.get("front") or cropped_images.get("back") or detected_image
	return single_image, labels, api_results


	iface = gr.Interface(
	fn=predict,
	inputs="image",
	outputs=["image", "text", "json"],
	title="License Field Detection (Front & Back Card)"
	)

	iface.launch()