Spaces:

syedfaisalabrar
/

License_Classification

Sleeping

App Files Files Community

License_Classification / app.py

syedfaisalabrar

Update app.py

3d14c4c verified about 1 year ago

raw

history blame

4.55 kB

	import gradio as gr
	import torch
	import cv2
	import os
	import numpy as np
	from PIL import Image, ImageEnhance
	from ultralytics import YOLO
	from decord import VideoReader, cpu
	from torchvision.transforms.functional import InterpolationMode
	from transformers import AutoModel, AutoTokenizer
	from backPrompt import main as main_b
	from frontPrompt import main as main_f
	import sentencepiece as spm

	model_path = "best.pt"
	modelY = YOLO(model_path)
	os.environ["TRANSFORMERS_CACHE"] = "./.cache"
	cache_folder = "./.cache"
	path = "OpenGVLab/InternVL2_5-2B"
	# Load the Hugging Face model and tokenizer globally (downloaded only once)
	model = AutoModel.from_pretrained(
	path,
	cache_dir=cache_folder,
	torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
	# load_in_8bit=True,
	low_cpu_mem_usage=True,
	use_flash_attn=True,
	trust_remote_code=True
	).eval().cpu()

	tokenizer = AutoTokenizer.from_pretrained(
	path,
	cache_dir=cache_folder,
	trust_remote_code=True,
	use_fast=False
	)


	from PIL import Image, ImageEnhance
	import numpy as np
	import torch

	def preprocessing(image):
	"""Apply three enhancement filters without resizing or cropping."""

	# Ensure the image is a PIL Image
	if not isinstance(image, Image.Image):
	image = Image.fromarray(np.array(image))

	# Apply enhancements
	image = ImageEnhance.Sharpness(image).enhance(2.0) # Increase sharpness
	image = ImageEnhance.Contrast(image).enhance(1.5) # Increase contrast
	image = ImageEnhance.Brightness(image).enhance(0.8) # Reduce brightness

	# Convert to tensor without resizing
	image_tensor = torch.tensor(np.array(image)).permute(2, 0, 1).float() / 255.0 # Shape: [C, H, W]

	return image_tensor





	def imageRotation(image):

	return image


	def detect_document(image):
	"""Detects front and back of the document using YOLO."""
	image = np.array(image)
	results = modelY(image, conf=0.85)

	detected_classes = set()
	labels = []
	bounding_boxes = []

	for result in results:
	for box in result.boxes:
	x1, y1, x2, y2 = map(int, box.xyxy[0])
	conf = box.conf[0]
	cls = int(box.cls[0])
	class_name = modelY.names[cls]

	detected_classes.add(class_name)
	label = f"{class_name} {conf:.2f}"
	labels.append(label)
	bounding_boxes.append((x1, y1, x2, y2, class_name, conf)) # Store bounding box with class and confidence

	cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
	cv2.putText(image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

	possible_classes = {"front", "back"}
	missing_classes = possible_classes - detected_classes
	if missing_classes:
	labels.append(f"Missing: {', '.join(missing_classes)}")

	return Image.fromarray(image), labels, bounding_boxes


	def crop_image(image, bounding_boxes):
	"""Crops detected bounding boxes from the image."""
	cropped_images = {}
	image = np.array(image)

	for (x1, y1, x2, y2, class_name, conf) in bounding_boxes:
	cropped = image[y1:y2, x1:x2]
	cropped_images[class_name] = Image.fromarray(cropped)

	return cropped_images


	def vision_ai_api(image, doc_type):

	if doc_type == "front":
	results = main_f(image,model,tokenizer)
	if doc_type == "back":
	results = main_b(image,model,tokenizer)

	return results


	def predict(image):
	"""Pipeline: Preprocess -> Detect -> Crop -> Vision AI API."""
	processed_image = preprocessing(image)
	rotated_image = imageRotation(processed_image) # Placeholder for rotation
	detected_image, labels, bounding_boxes = detect_document(rotated_image)

	cropped_images = crop_image(rotated_image, bounding_boxes)

	# Call Vision AI separately for front and back if detected
	front_result, back_result = None, None
	if "front" in cropped_images:
	front_result = vision_ai_api(cropped_images["front"], "front")
	if "back" in cropped_images:
	back_result = vision_ai_api(cropped_images["back"], "back")


	api_results = {
	"front": front_result,
	"back": back_result
	}
	single_image = cropped_images.get("front") or cropped_images.get("back") or detected_image
	return single_image, labels, api_results


	iface = gr.Interface(
	fn=predict,
	inputs="image",
	outputs=["image", "text", "json"],
	title="License Field Detection (Front & Back Card)"
	)

	iface.launch()