Manith Marapperuma commited on
Commit
13e0611
·
1 Parent(s): 998536c

init commit

Browse files
Files changed (3) hide show
  1. app.py +63 -0
  2. model_v1.0.pt +3 -0
  3. requirements.txt +0 -0
app.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PIL import Image
3
+ import torch
4
+ from torchvision import transforms
5
+ from facenet_pytorch import MTCNN
6
+ import matplotlib.pyplot as plt
7
+
8
+ # Function to load the ViT model and MTCNN
9
+ def load_model_and_mtcnn(model_path):
10
+ model = torch.load(model_path, map_location=torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
11
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
12
+ model.to(device)
13
+ mtcnn = MTCNN(keep_all=True, device=device)
14
+ return model, device, mtcnn
15
+
16
+ # Function to preprocess the image and return both the tensor and the final PIL image for display
17
+ def preprocess_image(image, mtcnn, device):
18
+ processed_image = image # Initialize with the original image
19
+ try:
20
+ # The return_image parameter of MTCNN's forward method can return the original image along with detected faces, but here we directly pass the image
21
+ cropped_faces, _ = mtcnn(image, return_image=True)
22
+ if cropped_faces is not None and len(cropped_faces) > 0:
23
+ processed_image = cropped_faces[0] # Use the first detected face
24
+ # No else clause needed; if no faces detected, processed_image remains the original
25
+ except Exception as e:
26
+ st.write(f"Exception in face detection: {e}")
27
+ processed_image = image
28
+
29
+ transform = transforms.Compose([
30
+ transforms.Resize((224, 224)),
31
+ transforms.ToTensor(),
32
+ transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
33
+ ])
34
+ image_tensor = transform(processed_image).to(device)
35
+ image_tensor = image_tensor.unsqueeze(0) # Add a batch dimension
36
+ return image_tensor, processed_image
37
+
38
+ # Function for inference
39
+ def predict(image_tensor, model, device):
40
+ model.eval()
41
+ with torch.no_grad():
42
+ outputs = model(image_tensor)
43
+ probabilities = torch.nn.functional.softmax(outputs.logits, dim=1)
44
+ predicted_class = torch.argmax(probabilities, dim=1)
45
+ return predicted_class, probabilities
46
+
47
+ # Streamlit UI
48
+ st.title("Face Detection and Classification with ViT")
49
+ st.write("Upload an image, and the model will detect faces and classify the image.")
50
+
51
+ model_path = "path_to_your_ViT_model.pt" # Make sure to upload your model to Hugging Face Spaces or adjust this path
52
+ model, device, mtcnn = load_model_and_mtcnn(model_path)
53
+
54
+ uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
55
+ if uploaded_file is not None:
56
+ image = Image.open(uploaded_file).convert("RGB")
57
+ st.image(image, caption='Uploaded Image', use_column_width=True)
58
+ image_tensor, final_image = preprocess_image(image, mtcnn, device)
59
+ predicted_class, probabilities = predict(image_tensor, model, device)
60
+
61
+ st.write(f"Predicted class: {predicted_class.item()}")
62
+ # Display the final processed image
63
+ st.image(final_image, caption='Processed Image', use_column_width=True)
model_v1.0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49b6bc053a64219bc599a332d053e43a46648d761878e59219924dcf7144d07c
3
+ size 343321862
requirements.txt ADDED
File without changes