Spaces:

clementBE
/

image_classifier

Sleeping

App Files Files Community

clementBE commited on Sep 30, 2025

Commit

92e5b44

verified ·

1 Parent(s): d00da7d

Update app.py

Browse files

Files changed (1) hide show

app.py +273 -250

app.py CHANGED Viewed

@@ -1,254 +1,277 @@
-import gradio as gr
-import requests
 import os
 import tempfile
-import shutil
-import urllib.request
-import isodate
-import datetime
-# --- IMPORTANT: Ensure this environment variable is set ---
-API_KEY = os.getenv("YOUTUBE_API_KEY")
-BASE_URL = "https://www.googleapis.com/youtube/v3"
-# -----------------------
-# API Usage Tracker
-# -----------------------
-API_USAGE = {"units": 0}
-def api_get(url, cost, **kwargs):
-    """Wrapper to count quota usage"""
-    API_USAGE["units"] += cost
-    r = requests.get(url, **kwargs)
-    return r
-# -----------------------
-# Helper Functions (Simplified)
-# -----------------------
-def parse_duration(duration_str):
-    try:
-        return int(isodate.parse_duration(duration_str).total_seconds())
-    except Exception:
-        return 0
-def get_channel_info(channel_id):
-    """Fetches channel snippet (including title) (Cost: 1)."""
-    r = api_get(f"{BASE_URL}/channels?part=snippet&id={channel_id}&key={API_KEY}", 1)
-    if r.status_code == 200 and 'items' in r.json() and r.json()['items']:
-        return r.json()['items'][0]['snippet']
-    return None
-def extract_channel_id(url: str):
-    """Extracts the Channel ID from various YouTube URLs."""
-    if "channel/" in url:
-        return url.split("channel/")[1].split("/")[0]
-    elif "/@" in url:
-        handle = url.split("/@")[1].split("/")[0]
-        r = api_get(f"{BASE_URL}/search?part=snippet&type=channel&q={handle}&key={API_KEY}", 100)
-        if r.status_code != 200: return None
-        data = r.json()
-        if "items" in data and data["items"]:
-            return data["items"][0]["snippet"]["channelId"]
-    elif "user/" in url:
-        username = url.split("user/")[1].split("/")[0]
-        r = api_get(f"{BASE_URL}/channels?part=id&forUsername={username}&key={API_KEY}", 1)
-        if r.status_code != 200: return None
-        data = r.json()
-        if "items" in data and data["items"]:
-            return data["items"][0]["id"]
-    return None
-def get_uploads_playlist(channel_id):
-    """Fetches the 'uploads' playlist ID for a given channel (Cost: 1)."""
-    r = api_get(f"{BASE_URL}/channels?part=contentDetails&id={channel_id}&key={API_KEY}", 1).json()
-    return r['items'][0]['contentDetails']['relatedPlaylists']['uploads']
-# -----------------------
-# Fetch and Filter Video IDs
-# -----------------------
-def filter_video_ids(video_ids, mode="videos"):
-    """Filters a list of video IDs based on their duration (Cost: 1 unit per 50 videos)."""
-    selected = []
-    for i in range(0, len(video_ids), 50):
-        batch = video_ids[i:i+50]
-        r = api_get(f"{BASE_URL}/videos?part=contentDetails&id={','.join(batch)}&key={API_KEY}", 1).json()
-        for item in r.get("items", []):
-            if 'contentDetails' not in item: continue
-            duration = parse_duration(item["contentDetails"]["duration"])
-            vid = item["id"]
-            if mode == "videos":
-                if duration >= 60:
-                    selected.append(vid)
-            elif mode == "shorts":
-                if duration < 60:
-                    selected.append(vid)
-            elif mode == "all":
-                selected.append(vid)
-    return selected
-def get_playlist_video_ids(playlist_id, max_videos=50, mode="videos"):
-    """Pulls video IDs from a playlist and filters them until max_videos is reached."""
-    video_ids = []
-    next_page = None
-    while len(video_ids) < max_videos:
-        fetch_count = 50
-        url = f"{BASE_URL}/playlistItems?part=snippet&playlistId={playlist_id}&maxResults={fetch_count}&key={API_KEY}"
-        if next_page: url += f"&pageToken={next_page}"
-        r = api_get(url, 1).json()
-        raw_ids = [item["snippet"]["resourceId"]["videoId"] for item in r.get("items", [])]
-        filtered_ids = filter_video_ids(raw_ids, mode=mode)
-        remaining_slots = max_videos - len(video_ids)
-        video_ids.extend(filtered_ids[:remaining_slots])
-        next_page = r.get("nextPageToken")
-        if not next_page or len(raw_ids) == 0:
-            break
-    return video_ids[:max_videos]
-def get_live_video_ids(channel_id, max_videos=50):
-    """Fetches completed live streams (Cost: 100)."""
-    video_ids = []
-    url = f"{BASE_URL}/search?part=id&channelId={channel_id}&eventType=completed&type=video&maxResults={max_videos}&key={API_KEY}"
-    r = api_get(url, 100).json()
-    for item in r.get("items", []):
-        video_ids.append(item["id"]["videoId"])
-    return video_ids
-# -----------------------
-# Thumbnails Download and Prep
-# -----------------------
-def download_thumbnails(video_ids):
-    """Downloads thumbnails to a temp directory (Cost: 1 unit per 50 thumbnails)."""
-    tmp_dir = tempfile.mkdtemp()
-    thumb_paths = []
-    for i in range(0, len(video_ids), 50):
-        batch = video_ids[i:i+50]
-        r = api_get(f"{BASE_URL}/videos?part=snippet&id={','.join(batch)}&key={API_KEY}", 1).json()
-        for item in r.get("items", []):
-            if 'snippet' not in item: continue
-            snippet = item['snippet']
-            thumbnails = snippet['thumbnails']
-            thumb_url = thumbnails.get("maxres", thumbnails.get("standard", thumbnails.get("high", thumbnails.get("default"))))["url"]
-            # Use the video title for the filename for better context in gr.Files
-            title_safe = "".join(c if c.isalnum() or c in (' ', '_') else '_' for c in snippet['title']).strip().replace(' ', '_')
-            filename = os.path.join(tmp_dir, f"{title_safe}_{item['id']}.jpg")
-            urllib.request.urlretrieve(thumb_url, filename)
-            thumb_paths.append(filename)
-    return tmp_dir, thumb_paths
-def fetch_channel_thumbnails(channel_url, max_videos, page_mode):
-    """Main function to orchestrate video fetching and thumbnail download."""
-    channel_id = extract_channel_id(channel_url)
-    if not channel_id:
-        return "❌ Could not extract channel ID", None, None, None
-    channel_info = get_channel_info(channel_id)
-    if not channel_info:
-        return "❌ Could not fetch channel info", None, None, None
-    channel_name = channel_info.get("title", "unknown_channel")
-    if page_mode in ["videos", "shorts", "all"]:
-        try:
-            playlist_id = get_uploads_playlist(channel_id)
-        except Exception:
-            return "❌ Could not find channel 'uploads' playlist ID", None, None, None
-        video_ids = get_playlist_video_ids(playlist_id, max_videos=max_videos, mode=page_mode)
-    elif page_mode == "live":
-        video_ids = get_live_video_ids(channel_id, max_videos=max_videos)
-    else:
-        return "❌ Unknown mode", None, None, None
-    if not video_ids:
-        return f"❌ No {page_mode} found", None, None, None
-    tmp_dir, thumbs = download_thumbnails(video_ids)
-    return f"✅ Fetched {len(thumbs)} {page_mode}", thumbs, tmp_dir, channel_name
-def prepare_zip(thumb_dir, channel_name):
-    """Creates a zip archive with a custom filename."""
-    safe_channel_name = "".join(c if c.isalnum() or c in (' ', '_') else '_' for c in channel_name).strip().replace(' ', '_')
-    date_str = datetime.datetime.now().strftime("%Y%m%d")
-    zip_filename_base = f"{safe_channel_name}_Thumbnails_{date_str}"
-    zip_path_no_ext = os.path.join(tempfile.gettempdir(), zip_filename_base)
-    shutil.make_archive(zip_path_no_ext, 'zip', thumb_dir)
-    final_zip_path = zip_path_no_ext + ".zip"
-    return final_zip_path
-# -----------------------
-# Generator for live status updates
-# -----------------------
-def fetch_and_zip_progress(channel_url, max_videos, page_mode):
-    API_USAGE["units"] = 0
-    yield f"Starting fetch... | API quota used: {API_USAGE['units']} units", [], None, gr.File(visible=False) # 💡 Added gr.File update
-    status, thumbs, tmp_dir, channel_name = fetch_channel_thumbnails(channel_url, max_videos, page_mode)
-    quota_used = API_USAGE["units"]
-    final_status = status.replace("videos", "long-form videos (>= 60s)") if page_mode == "videos" else status
-    final_status = final_status.replace("shorts", "shorts (< 60s)") if page_mode == "shorts" else final_status
-    zip_file = None
-    if thumbs:
-        zip_file = prepare_zip(tmp_dir, channel_name)
-    elif tmp_dir and os.path.isdir(tmp_dir):
-        shutil.rmtree(tmp_dir)
-    # 💡 IMPORTANT: Now yielding a list of file paths (thumbs) and the zip file path.
-    # The 'thumbs' list goes to gr.Files.
-    yield f"{final_status} | API quota used: {quota_used} units", thumbs, zip_file, gr.File(visible=True) # 💡 Set visible=True on success
-# -----------------------
-# Gradio Interface (Modified)
-# -----------------------
-with gr.Blocks() as demo:
-    gr.Markdown("## 🎬 YouTube Channel Thumbnails Downloader (Files Preview)")
-    gr.Markdown("Thumbnails are now listed as individual files. Click the filename to preview/download.")
-    url_input = gr.Textbox(label="YouTube Channel URL", placeholder="https://www.youtube.com/@roisinmurphyofficial")
-    page_selector = gr.Dropdown(
-        choices=["videos", "shorts", "live", "all"],
-        value="videos",
-        label="Page to Collect"
-    )
-    max_videos_slider = gr.Slider(minimum=1, maximum=100, step=1, value=20, label="Max Items to Fetch")
-    start_btn = gr.Button("🚀 Start Collect")
-    status_output = gr.Textbox(label="Status")
-    # 💡 REPLACED gr.Gallery with gr.Files
-    thumbs_list = gr.Files(
-        label="Thumbnails Preview and Download (Click name for preview)",
-        file_count="multiple", # Allows multiple files
-        type="filepath", # Returns the path, which is what we need
-        visible=True # Ensure it's visible initially
-    )
-    download_btn = gr.File(label="Download All Thumbnails (ZIP)")
-    start_btn.click(
-        fetch_and_zip_progress,
-        inputs=[url_input, max_videos_slider, page_selector],
-        # 💡 Updated output targets to match the new return values
-        outputs=[status_output, thumbs_list, download_btn, download_btn]
-        # Note: Added download_btn twice as the generator yields 4 items,
-        # but the last one is a gr.File update to hide/show the component.
-        # This is a slightly awkward necessity of Gradio's generator API.
-    )
 if __name__ == "__main__":
-    demo.launch()

 import os
+import zipfile
 import tempfile
+import requests
+import numpy as np
+import pandas as pd
+from PIL import Image
+import torch
+import torch.nn.functional as F
+from torchvision import transforms
+from torchvision.models import resnet50, ResNet50_Weights
+from sklearn.cluster import MiniBatchKMeans
+import matplotlib.pyplot as plt
+import io
+from datetime import datetime
+import gradio as gr
+# Face analysis
+from deepface import DeepFace
+import cv2
+# ---------------------------
+# Force CPU if no CUDA
+# ---------------------------
+if not torch.cuda.is_available():
+    os.environ["CUDA_VISIBLE_DEVICES"] = ""
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# ---------------------------
+# Load ResNet50
+# ---------------------------
+weights = ResNet50_Weights.DEFAULT
+model = resnet50(weights=weights).to(device)
+model.eval()
+# ---------------------------
+# Transformations
+# ---------------------------
+transform = transforms.Compose([
+    transforms.Resize(256),
+    transforms.CenterCrop(224),
+    transforms.ToTensor(),
+    transforms.Normalize(mean=[0.485, 0.456, 0.406],
+                         std=[0.229, 0.224, 0.225]),
+])
+# ---------------------------
+# ImageNet labels
+# ---------------------------
+LABELS_URL = "https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt"
+imagenet_classes = [line.strip() for line in requests.get(LABELS_URL).text.splitlines()]
+# ---------------------------
+# Color utilities
+# ---------------------------
+BASIC_COLORS = {
+    "Red": (255, 0, 0),
+    "Green": (0, 255, 0),
+    "Blue": (0, 0, 255),
+    "Yellow": (255, 255, 0),
+    "Cyan": (0, 255, 255),
+    "Magenta": (255, 0, 255),
+    "Black": (0, 0, 0),
+    "White": (255, 255, 255),
+    "Gray": (128, 128, 128),
+}
+def closest_basic_color(rgb):
+    r, g, b = rgb
+    min_dist = float("inf")
+    closest_color = None
+    for name, (cr, cg, cb) in BASIC_COLORS.items():
+        dist = (r - cr) ** 2 + (g - cg) ** 2 + (b - cb) ** 2
+        if dist < min_dist:
+            min_dist = dist
+            closest_color = name
+    return closest_color
+def get_dominant_color(image, num_colors=5):
+    image = image.resize((100, 100))
+    pixels = np.array(image).reshape(-1, 3)
+    kmeans = MiniBatchKMeans(n_clusters=num_colors, random_state=0, n_init=5)
+    kmeans.fit(pixels)
+    dominant_color = kmeans.cluster_centers_[np.argmax(np.bincount(kmeans.labels_))]
+    dominant_color = tuple(dominant_color.astype(int))
+    hex_color = f"#{dominant_color[0]:02x}{dominant_color[1]:02x}{dominant_color[2]:02x}"
+    return dominant_color, hex_color
+# ---------------------------
+# Core function
+# ---------------------------
+def classify_zip_and_analyze_color(zip_file):
+    results = []
+    zip_name = os.path.splitext(os.path.basename(zip_file.name))[0]
+    date_str = datetime.now().strftime("%Y%m%d")
+    with tempfile.TemporaryDirectory() as tmpdir:
+        with zipfile.ZipFile(zip_file.name, 'r') as zip_ref:
+            zip_ref.extractall(tmpdir)
+        for fname in sorted(os.listdir(tmpdir)):
+            if fname.lower().endswith(('.png', '.jpg', '.jpeg')):
+                img_path = os.path.join(tmpdir, fname)
+                try:
+                    image = Image.open(img_path).convert("RGB")
+                except Exception:
+                    continue
+                # Classification
+                input_tensor = transform(image).unsqueeze(0).to(device)
+                with torch.no_grad():
+                    output = model(input_tensor)
+                    probs = F.softmax(output, dim=1)[0]
+                top3_prob, top3_idx = torch.topk(probs, 3)
+                preds = [(imagenet_classes[idx], f"{prob.item()*100:.2f}%") for idx, prob in zip(top3_idx, top3_prob)]
+                # Dominant color
+                rgb, hex_color = get_dominant_color(image)
+                basic_color = closest_basic_color(rgb)
+                # Face detection & characterization
+                faces_data = []
+                try:
+                    img_cv2 = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
+                    detected_faces = DeepFace.analyze(
+                        img_cv2, actions=["age", "gender", "emotion"], enforce_detection=False
+                    )
+                    if isinstance(detected_faces, list):
+                        for f in detected_faces:
+                            faces_data.append({
+                                "age": f["age"],
+                                "gender": f["gender"],
+                                "emotion": f["dominant_emotion"]
+                            })
+                    else:
+                        faces_data.append({
+                            "age": detected_faces["age"],
+                            "gender": detected_faces["gender"],
+                            "emotion": detected_faces["dominant_emotion"]
+                        })
+                except Exception:
+                    faces_data = []
+                # Thumbnail preview
+                thumbnail = image.copy()
+                thumbnail.thumbnail((64, 64))
+                results.append((
+                    fname,
+                    ", ".join([p[0] for p in preds]),
+                    ", ".join([p[1] for p in preds]),
+                    hex_color,
+                    basic_color,
+                    faces_data,
+                    thumbnail
+                ))
+    # Build dataframe
+    df = pd.DataFrame(results, columns=[
+        "Filename", "Top 3 Predictions", "Confidence",
+        "Dominant Color", "Basic Color", "Face Info", "Thumbnail"
+    ])
+    # Save XLSX with zip name + date
+    out_xlsx = os.path.join(tempfile.gettempdir(), f"{zip_name}_{date_str}_results.xlsx")
+    df.to_excel(out_xlsx, index=False)
+    # ---------------------------
+    # Plot 1: Basic color frequency
+    # ---------------------------
+    fig1, ax1 = plt.subplots()
+    color_counts = df["Basic Color"].value_counts()
+    ax1.bar(color_counts.index, color_counts.values, color="skyblue")
+    ax1.set_title("Basic Color Frequency")
+    ax1.set_ylabel("Count")
+    buf1 = io.BytesIO()
+    plt.savefig(buf1, format="png")
+    plt.close(fig1)
+    buf1.seek(0)
+    plot1_img = Image.open(buf1)
+    # ---------------------------
+    # Plot 2: Top prediction distribution
+    # ---------------------------
+    fig2, ax2 = plt.subplots()
+    preds_flat = []
+    for p in df["Top 3 Predictions"]:
+        preds_flat.extend(p.split(", "))
+    pred_counts = pd.Series(preds_flat).value_counts().head(20)
+    ax2.barh(pred_counts.index[::-1], pred_counts.values[::-1], color="salmon")
+    ax2.set_title("Top Prediction Distribution")
+    ax2.set_xlabel("Count")
+    buf2 = io.BytesIO()
+    plt.savefig(buf2, format="png", bbox_inches="tight")
+    plt.close(fig2)
+    buf2.seek(0)
+    plot2_img = Image.open(buf2)
+    # ---------------------------
+    # Extract ages and genders
+    # ---------------------------
+    ages_male, ages_female = [], []
+    gender_confidence = {"Homme": 0, "Femme": 0}
+    for face_list in df["Face Info"]:
+        for face in face_list:
+            age = face["age"]
+            gender_dict = face["gender"]
+            gender = max(gender_dict, key=gender_dict.get)
+            conf = float(gender_dict[gender]) / 100
+            weight = min(conf, 0.9)
+            gender_trans = "Homme" if gender == "Man" else "Femme"
+            gender_confidence[gender_trans] += weight
+            if gender_trans == "Homme":
+                ages_male.append(age)
+            else:
+                ages_female.append(age)
+    # ---------------------------
+    # Plot 3: Gender distribution
+    # ---------------------------
+    fig3, ax3 = plt.subplots()
+    ax3.bar(gender_confidence.keys(), gender_confidence.values(), color=["lightblue", "pink"])
+    ax3.set_title("Gender Distribution (Weighted ≤90%)")
+    ax3.set_ylabel("Sum of Confidence")
+    buf3 = io.BytesIO()
+    plt.savefig(buf3, format="png")
+    plt.close(fig3)
+    buf3.seek(0)
+    plot3_img = Image.open(buf3)
+    # ---------------------------
+    # Plot 4: Age distribution by gender
+    # ---------------------------
+    fig4, ax4 = plt.subplots()
+    bins = range(0, 101, 5)
+    ax4.hist([ages_male, ages_female], bins=bins, color=["lightblue", "pink"], label=["Homme", "Femme"], edgecolor="black")
+    ax4.set_title("Age Distribution by Gender")
+    ax4.set_xlabel("Age")
+    ax4.set_ylabel("Count")
+    ax4.legend()
+    buf4 = io.BytesIO()
+    plt.savefig(buf4, format="png")
+    plt.close(fig4)
+    buf4.seek(0)
+    plot4_img = Image.open(buf4)
+    return df, out_xlsx, plot1_img, plot2_img, plot3_img, plot4_img
+# ---------------------------
+# Gradio Interface
+# ---------------------------
+demo = gr.Interface(
+    fn=classify_zip_and_analyze_color,
+    inputs=gr.File(file_types=[".zip"], label="Upload ZIP of images"),
+    outputs=[
+        gr.Dataframe(
+            headers=["Filename", "Top 3 Predictions", "Confidence",
+                     "Dominant Color", "Basic Color", "Face Info", "Thumbnail"],
+            datatype=["str","str","str","str","str","str","pil"]
+        ),
+        gr.File(label="Download XLSX"),
+        gr.Image(type="pil", label="Basic Color Frequency"),
+        gr.Image(type="pil", label="Top Prediction Distribution"),
+        gr.Image(type="pil", label="Gender Distribution (Weighted ≤90%)"),
+        gr.Image(type="pil", label="Age Distribution by Gender"),
+    ],
+    title="Image Classifier with Color & Face Analysis",
+    description="Upload a ZIP of images. Classifies images, analyzes dominant color, detects/characterizes faces (age, gender, emotion), and shows thumbnails.",
+)
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)