Spaces:

Pafkun333
/

russian_monument

Sleeping

App Files Files Community

Pafkun333 commited on Sep 10, 2025

Commit

aeaf3f3

1 Parent(s): 3acf7f6

Commiting first one

Browse files

Files changed (8) hide show

.gitattributes +4 -0
app.py +122 -0
examples/bleyla_new.jpg +3 -0
examples/byjd_new.jpg +3 -0
examples/falafelcho.jpg +3 -0
model.py +49 -0
model_2.pth +3 -0
requirements.txt +5 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+model_2.pth filter=lfs diff=lfs merge=lfs -text
+examples/bleyla_new.jpg filter=lfs diff=lfs merge=lfs -text
+examples/byjd_new.jpg filter=lfs diff=lfs merge=lfs -text
+examples/falafelcho.jpg filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,122 @@

+import gradio as gr
+import torch
+from torchvision import transforms
+from PIL import Image
+from gtts import gTTS
+import os
+import uuid
+import random
+import time
+from model import load_face_classifier_model # Import the model loading function
+# Define the same validation transform used during training
+val_transform = transforms.Compose([
+    transforms.Resize(256),
+    transforms.CenterCrop(224),
+    transforms.ToTensor(),
+    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+])
+# Load the model using the function from model.py
+model = load_face_classifier_model(model_path='model_2.pth', num_classes=5)
+def cleanup_audio_files(directory=".", prefix="prediction_", max_age_seconds=30):
+    now = time.time()
+    for filename in os.listdir(directory):
+        if filename.startswith(prefix) and filename.endswith(".mp3"):
+            filepath = os.path.join(directory, filename)
+            file_age = now - os.path.getmtime(filepath)
+            if file_age > max_age_seconds:
+                try:
+                    os.remove(filepath)
+                except Exception as e:
+                    print(f"Error deleting {filename}: {e}")
+def classify_face_with_audio_new(image: Image.Image):
+    """
+    Classifies a single image (captured from camera) using a trained model
+    and generates an audio file of the prediction.
+    Args:
+        image (PIL.Image.Image): The input image.
+    Returns:
+        tuple: A tuple containing the predicted class name (str)
+               and the path to the generated audio file (str).
+    """
+    byjd_audio = ["Не ме гледай! Дай ми пауч!", "Писи Писи, Мяу Мяу", "просто мяу",
+                  "мррррррррррр"]
+    bleyla_audio = ["Плешкиииииитуууууууууууу", "Дай ми цун!", "Отивам при Вес Божа",
+                  "А къде е прасетуу ?"]
+    jenny_audio = ["Офффф гладна съм!", "Здравейте, аз съм в овулация.", "Да пием кафе на 43.12 и да ядем шницел!",
+                  "Офф бе Павееел!", "Обичам Дони Донсъна."]
+    sachu_audio = ["Мишо, ще ти счупя носа!", "Засъхнало аку на дупи на кучии.", "Чекии ли си правиш бе, педалче малко?",
+                  "Обичам пръцкото на Сога!"]
+    falafel_audio = ["Дааарлинг, къде са ми чорапите?", "Маняк, измий си краката.", "Молим те, изкъпи се!",
+                     "Обичам пръцкото на Жени!"]
+    if image is None:
+        return "Error: Could not capture image from webcam. Please try again.", None
+    # Ensure image is in RGB format and apply transform
+    image = image.convert("RGB")
+    image = val_transform(image).unsqueeze(0) # Add batch dimension
+    # Move the image to the device (assuming GPU is available)
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    image = image.to(device)
+    model.to(device) # Move the model to the device as well
+    # Perform inference
+    with torch.no_grad():
+        outputs = model(image)
+        # Get the predicted class index
+        _, predicted_idx = torch.max(outputs.data, 1)
+    # Get the predicted class name
+    class_names = ['bleyla', 'byjd', 'falafel', 'jenny', 'sachu']
+    predicted_class = class_names[predicted_idx.item()]
+    # Generate audio
+    if predicted_class == "falafel":
+        text_to_speak = random.choice(falafel_audio)
+    elif predicted_class == "sachu":
+        text_to_speak = random.choice(sachu_audio)
+    elif predicted_class == "jenny":
+        text_to_speak = random.choice(jenny_audio)
+    elif predicted_class == "bleyla":
+        text_to_speak = random.choice(bleyla_audio)
+    elif predicted_class == "byjd":
+        text_to_speak = random.choice(byjd_audio)
+    else:
+        text_to_speak = "Unknown class"
+    tts = gTTS(text=text_to_speak, lang='bg')
+    audio_file = f"prediction_{uuid.uuid4()}.mp3"
+    tts.save(audio_file)
+    # Ensure file cleanup
+    cleanup_audio_files()
+    return predicted_class, audio_file
+# Create the Gradio interface
+interface = gr.Interface(
+    fn=classify_face_with_audio_new,
+    inputs=gr.Image(type="pil", label="Upload an image or use your camera"),
+    outputs=[
+        gr.Textbox(label="Predicted Class"),
+        gr.Audio(label="Audio Pronunciation")
+    ],
+    title="Russian Monument Classifier",
+    description="Upload an image or use your camera to classify Russian Monument Citizens.",
+    examples=[["examples/bleyla_new.jpg"], ["examples/byjd_new.jpg"], ["examples/falafelcho.jpg"]] # Examples should be a list of lists
+)
+# Launch the interface
+if __name__ == "__main__":
+    interface.launch()

examples/bleyla_new.jpg ADDED Viewed

Git LFS Details

SHA256: 9f94dddc2c6a3a23892b1b85bf34503f6a3e78213d701cca6475ee0a652e09ed
Pointer size: 131 Bytes
Size of remote file: 231 kB

examples/byjd_new.jpg ADDED Viewed

Git LFS Details

SHA256: ee0907b096fcc4abfd6e7deed5f39c74093223330513d9ff0df88f4334439d6e
Pointer size: 131 Bytes
Size of remote file: 223 kB

examples/falafelcho.jpg ADDED Viewed

Git LFS Details

SHA256: 4b811fb513993892f2434af9dd8840b0ce8fe401edda857f1b0e5f1017a2dc19
Pointer size: 131 Bytes
Size of remote file: 248 kB

model.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import torch
+from torchvision.models import resnet18, ResNet18_Weights
+from torch import nn
+def load_face_classifier_model(model_path: str = 'model_2.pth', num_classes: int = 5):
+    """
+    Loads the pre-trained ResNet18 model, modifies the final layer,
+    loads the state dictionary, and sets the model to evaluation mode.
+    Args:
+        model_path (str): Path to the saved model state dictionary.
+        num_classes (int): Number of classes for the final linear layer.
+    Returns:
+        torch.nn.Module: The loaded model in evaluation mode.
+    """
+    # Load the pre-trained ResNet18 model with specified weights
+    weights = ResNet18_Weights.IMAGENET1K_V1
+    model = resnet18(weights=weights)
+    # Modify the final fully connected layer for the specified number of classes
+    num_ftrs = model.fc.in_features
+    model.fc = nn.Linear(num_ftrs, num_classes)
+    # Load the saved state dictionary
+    state_dict = torch.load(model_path, map_location=torch.device('cpu')) # Load to CPU
+    # Adjust keys to match the model (if necessary, based on how the model was saved)
+    # This adjustment is based on the observation from the previous failed attempt.
+    new_state_dict = {}
+    for k, v in state_dict.items():
+        if 'fc.1.' in k:
+            new_key = k.replace('fc.1.', 'fc.')
+            new_state_dict[new_key] = v
+        else:
+            new_state_dict[k] = v
+    model.load_state_dict(new_state_dict)
+    # Set the model to evaluation mode
+    model.eval()
+    return model
+if __name__ == '__main__':
+    # Example usage (for testing)
+    loaded_model = load_face_classifier_model()
+    print("Model loaded successfully:")
+    print(loaded_model)

model_2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:84140f841ffe511330eb0a18b96bf665b341f7759176d8a6885787d7aa2e2a1d
+size 44793355

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+gradio==3.1.4
+torch==2.8.0
+torchvision==0.23.0
+Pillow
+gtts