Spaces:

Pafkun333
/

russian_monument

Sleeping

App Files Files Community

russian_monument / app.py

Pafkun333

Commiting first one

aeaf3f3 7 months ago

raw

history blame contribute delete

4.88 kB

	import gradio as gr
	import torch
	from torchvision import transforms
	from PIL import Image
	from gtts import gTTS
	import os
	import uuid
	import random
	import time

	from model import load_face_classifier_model # Import the model loading function

	# Define the same validation transform used during training
	val_transform = transforms.Compose([
	transforms.Resize(256),
	transforms.CenterCrop(224),
	transforms.ToTensor(),
	transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
	])

	# Load the model using the function from model.py
	model = load_face_classifier_model(model_path='model_2.pth', num_classes=5)


	def cleanup_audio_files(directory=".", prefix="prediction_", max_age_seconds=30):
	now = time.time()
	for filename in os.listdir(directory):
	if filename.startswith(prefix) and filename.endswith(".mp3"):
	filepath = os.path.join(directory, filename)
	file_age = now - os.path.getmtime(filepath)
	if file_age > max_age_seconds:
	try:
	os.remove(filepath)
	except Exception as e:
	print(f"Error deleting {filename}: {e}")

	def classify_face_with_audio_new(image: Image.Image):
	"""
	Classifies a single image (captured from camera) using a trained model
	and generates an audio file of the prediction.

	Args:
	image (PIL.Image.Image): The input image.

	Returns:
	tuple: A tuple containing the predicted class name (str)
	and the path to the generated audio file (str).
	"""

	byjd_audio = ["Не ме гледай! Дай ми пауч!", "Писи Писи, Мяу Мяу", "просто мяу",
	"мррррррррррр"]
	bleyla_audio = ["Плешкиииииитуууууууууууу", "Дай ми цун!", "Отивам при Вес Божа",
	"А къде е прасетуу ?"]
	jenny_audio = ["Офффф гладна съм!", "Здравейте, аз съм в овулация.", "Да пием кафе на 43.12 и да ядем шницел!",
	"Офф бе Павееел!", "Обичам Дони Донсъна."]
	sachu_audio = ["Мишо, ще ти счупя носа!", "Засъхнало аку на дупи на кучии.", "Чекии ли си правиш бе, педалче малко?",
	"Обичам пръцкото на Сога!"]
	falafel_audio = ["Дааарлинг, къде са ми чорапите?", "Маняк, измий си краката.", "Молим те, изкъпи се!",
	"Обичам пръцкото на Жени!"]

	if image is None:
	return "Error: Could not capture image from webcam. Please try again.", None

	# Ensure image is in RGB format and apply transform
	image = image.convert("RGB")
	image = val_transform(image).unsqueeze(0) # Add batch dimension

	# Move the image to the device (assuming GPU is available)
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	image = image.to(device)
	model.to(device) # Move the model to the device as well


	# Perform inference
	with torch.no_grad():
	outputs = model(image)
	# Get the predicted class index
	_, predicted_idx = torch.max(outputs.data, 1)

	# Get the predicted class name
	class_names = ['bleyla', 'byjd', 'falafel', 'jenny', 'sachu']
	predicted_class = class_names[predicted_idx.item()]

	# Generate audio
	if predicted_class == "falafel":
	text_to_speak = random.choice(falafel_audio)
	elif predicted_class == "sachu":
	text_to_speak = random.choice(sachu_audio)
	elif predicted_class == "jenny":
	text_to_speak = random.choice(jenny_audio)
	elif predicted_class == "bleyla":
	text_to_speak = random.choice(bleyla_audio)
	elif predicted_class == "byjd":
	text_to_speak = random.choice(byjd_audio)
	else:
	text_to_speak = "Unknown class"

	tts = gTTS(text=text_to_speak, lang='bg')
	audio_file = f"prediction_{uuid.uuid4()}.mp3"
	tts.save(audio_file)

	# Ensure file cleanup
	cleanup_audio_files()

	return predicted_class, audio_file

	# Create the Gradio interface
	interface = gr.Interface(
	fn=classify_face_with_audio_new,
	inputs=gr.Image(type="pil", label="Upload an image or use your camera"),
	outputs=[
	gr.Textbox(label="Predicted Class"),
	gr.Audio(label="Audio Pronunciation")
	],
	title="Russian Monument Classifier",
	description="Upload an image or use your camera to classify Russian Monument Citizens.",
	examples=[["examples/bleyla_new.jpg"], ["examples/byjd_new.jpg"], ["examples/falafelcho.jpg"]] # Examples should be a list of lists
	)

	# Launch the interface
	if __name__ == "__main__":
	interface.launch()