Spaces:

devranx
/

PRISM2.0

Sleeping

App Files Files Community

PRISM2.0 / backend /utils.py

devranx

Initial deploy with LFS images and audio

d790e98 5 months ago

raw

history blame

3.95 kB

	import re
	import numpy as np
	import cv2
	from PIL import Image
	import random
	import torch
	import torchvision.transforms as T
	from torchvision.transforms.functional import InterpolationMode
	from difflib import SequenceMatcher
	from nltk.metrics.distance import edit_distance
	import nltk

	# Ensure NLTK data is downloaded
	try:
	nltk.data.find('corpora/words.zip')
	except LookupError:
	nltk.download('words')
	try:
	nltk.data.find('tokenizers/punkt')
	except LookupError:
	nltk.download('punkt')

	from nltk.corpus import words

	def set_seed(seed=42):
	random.seed(seed)
	np.random.seed(seed)
	torch.manual_seed(seed)
	# torch.cuda.manual_seed_all(seed) # Uncomment if using GPU
	torch.backends.cudnn.deterministic = True
	torch.backends.cudnn.benchmark = False

	def build_transform(input_size=448):
	mean = (0.485, 0.456, 0.406)
	std = (0.229, 0.224, 0.225)
	return T.Compose([
	T.Lambda(lambda img: img.convert('RGB')),
	T.Resize((input_size, input_size), interpolation=InterpolationMode.BICUBIC),
	T.ToTensor(),
	T.Normalize(mean=mean, std=std)
	])

	def get_roi(image_path_or_obj, *roi):
	"""
	Extracts ROI from an image path or PIL Image object.
	"""
	if isinstance(image_path_or_obj, str):
	image = Image.open(image_path_or_obj).convert('RGB')
	else:
	image = image_path_or_obj.convert('RGB')

	width, height = image.size

	roi_x_start = int(width * roi[0])
	roi_y_start = int(height * roi[1])
	roi_x_end = int(width * roi[2])
	roi_y_end = int(height * roi[3])

	cropped_image = image.crop((roi_x_start, roi_y_start, roi_x_end, roi_y_end))
	return cropped_image

	def clean_text(text):
	return re.sub(r'[^a-zA-Z0-9]', '', text).strip().lower()

	def are_strings_similar(str1, str2, max_distance=3, max_length_diff=2):
	if str1 == str2:
	return True
	if abs(len(str1) - len(str2)) > max_length_diff:
	return False
	edit_distance_value = edit_distance(str1, str2)
	return edit_distance_value <= max_distance

	def blur_image(image, strength):
	image_np = np.array(image)
	blur_strength = int(strength * 50)
	blur_strength = max(1, blur_strength \| 1)
	blurred_image = cv2.GaussianBlur(image_np, (blur_strength, blur_strength), 0)
	blurred_pil_image = Image.fromarray(blurred_image)
	return blurred_pil_image

	def is_blank(text, limit=15):
	return len(text) < limit

	def string_similarity(a, b):
	return SequenceMatcher(None, a.lower(), b.lower()).ratio()

	def find_similar_substring(text, keyword, threshold=0.9):
	text = text.lower()
	keyword = keyword.lower()

	if keyword in text:
	return True

	keyword_length = len(keyword.split())
	words_list = text.split()

	for i in range(len(words_list) - keyword_length + 1):
	phrase = ' '.join(words_list[i:i + keyword_length])
	similarity = string_similarity(phrase, keyword)
	if similarity >= threshold:
	return True

	return False

	def destroy_text_roi(image, *roi_params):
	image_np = np.array(image)

	h, w, _ = image_np.shape
	x1 = int(roi_params[0] * w)
	y1 = int(roi_params[1] * h)
	x2 = int(roi_params[2] * w)
	y2 = int(roi_params[3] * h)

	roi = image_np[y1:y2, x1:x2]

	blurred_roi = cv2.GaussianBlur(roi, (75, 75), 0)
	noise = np.random.randint(0, 50, (blurred_roi.shape[0], blurred_roi.shape[1], 3), dtype=np.uint8)
	noisy_blurred_roi = cv2.add(blurred_roi, noise)
	image_np[y1:y2, x1:x2] = noisy_blurred_roi
	return Image.fromarray(image_np)

	def is_english(text):
	allowed_pattern = re.compile(
	r'^[a-zA-Z०-९\u0930\s\.,!?\-;:"\'()]*$'
	)
	return bool(allowed_pattern.match(text))

	def is_valid_english(text):
	english_words = set(words.words())
	cleaned_words = ''.join(c.lower() if c.isalnum() else ' ' for c in text).split()
	return all(word.lower() in english_words for word in cleaned_words)