Spaces:

clementBE
/

image_classifier

Sleeping

App Files Files Community

clementBE commited on Sep 30, 2025

Commit

25bf82c

verified ·

1 Parent(s): c0ff534

Update app.py

Browse files

Files changed (1) hide show

app.py +250 -273

app.py CHANGED Viewed

@@ -1,277 +1,254 @@
 import os
-import zipfile
 import tempfile
-import requests
-import numpy as np
-import pandas as pd
-from PIL import Image
-import torch
-import torch.nn.functional as F
-from torchvision import transforms
-from torchvision.models import resnet50, ResNet50_Weights
-from sklearn.cluster import MiniBatchKMeans
-import matplotlib.pyplot as plt
-import io
-from datetime import datetime
-import gradio as gr
-# Face analysis
-from deepface import DeepFace
-import cv2
-# ---------------------------
-# Force CPU if no CUDA
-# ---------------------------
-if not torch.cuda.is_available():
-    os.environ["CUDA_VISIBLE_DEVICES"] = ""
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# ---------------------------
-# Load ResNet50
-# ---------------------------
-weights = ResNet50_Weights.DEFAULT
-model = resnet50(weights=weights).to(device)
-model.eval()
-# ---------------------------
-# Transformations
-# ---------------------------
-transform = transforms.Compose([
-    transforms.Resize(256),
-    transforms.CenterCrop(224),
-    transforms.ToTensor(),
-    transforms.Normalize(mean=[0.485, 0.456, 0.406],
-                         std=[0.229, 0.224, 0.225]),
-])
-# ---------------------------
-# ImageNet labels
-# ---------------------------
-LABELS_URL = "https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt"
-imagenet_classes = [line.strip() for line in requests.get(LABELS_URL).text.splitlines()]
-# ---------------------------
-# Color utilities
-# ---------------------------
-BASIC_COLORS = {
-    "Red": (255, 0, 0),
-    "Green": (0, 255, 0),
-    "Blue": (0, 0, 255),
-    "Yellow": (255, 255, 0),
-    "Cyan": (0, 255, 255),
-    "Magenta": (255, 0, 255),
-    "Black": (0, 0, 0),
-    "White": (255, 255, 255),
-    "Gray": (128, 128, 128),
-}
-def closest_basic_color(rgb):
-    r, g, b = rgb
-    min_dist = float("inf")
-    closest_color = None
-    for name, (cr, cg, cb) in BASIC_COLORS.items():
-        dist = (r - cr) ** 2 + (g - cg) ** 2 + (b - cb) ** 2
-        if dist < min_dist:
-            min_dist = dist
-            closest_color = name
-    return closest_color
-def get_dominant_color(image, num_colors=5):
-    image = image.resize((100, 100))
-    pixels = np.array(image).reshape(-1, 3)
-    kmeans = MiniBatchKMeans(n_clusters=num_colors, random_state=0, n_init=5)
-    kmeans.fit(pixels)
-    dominant_color = kmeans.cluster_centers_[np.argmax(np.bincount(kmeans.labels_))]
-    dominant_color = tuple(dominant_color.astype(int))
-    hex_color = f"#{dominant_color[0]:02x}{dominant_color[1]:02x}{dominant_color[2]:02x}"
-    return dominant_color, hex_color
-# ---------------------------
-# Core function
-# ---------------------------
-def classify_zip_and_analyze_color(zip_file):
-    results = []
-    zip_name = os.path.splitext(os.path.basename(zip_file.name))[0]
-    date_str = datetime.now().strftime("%Y%m%d")
-    with tempfile.TemporaryDirectory() as tmpdir:
-        with zipfile.ZipFile(zip_file.name, 'r') as zip_ref:
-            zip_ref.extractall(tmpdir)
-        for fname in sorted(os.listdir(tmpdir)):
-            if fname.lower().endswith(('.png', '.jpg', '.jpeg')):
-                img_path = os.path.join(tmpdir, fname)
-                try:
-                    image = Image.open(img_path).convert("RGB")
-                except Exception:
-                    continue
-                # Classification
-                input_tensor = transform(image).unsqueeze(0).to(device)
-                with torch.no_grad():
-                    output = model(input_tensor)
-                    probs = F.softmax(output, dim=1)[0]
-                top3_prob, top3_idx = torch.topk(probs, 3)
-                preds = [(imagenet_classes[idx], f"{prob.item()*100:.2f}%") for idx, prob in zip(top3_idx, top3_prob)]
-                # Dominant color
-                rgb, hex_color = get_dominant_color(image)
-                basic_color = closest_basic_color(rgb)
-                # Face detection & characterization
-                faces_data = []
-                try:
-                    img_cv2 = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
-                    detected_faces = DeepFace.analyze(
-                        img_cv2, actions=["age", "gender", "emotion"], enforce_detection=False
-                    )
-                    if isinstance(detected_faces, list):
-                        for f in detected_faces:
-                            faces_data.append({
-                                "age": f["age"],
-                                "gender": f["gender"],
-                                "emotion": f["dominant_emotion"]
-                            })
-                    else:
-                        faces_data.append({
-                            "age": detected_faces["age"],
-                            "gender": detected_faces["gender"],
-                            "emotion": detected_faces["dominant_emotion"]
-                        })
-                except Exception:
-                    faces_data = []
-                # Thumbnail preview
-                thumbnail = image.copy()
-                thumbnail.thumbnail((64, 64))
-                results.append((
-                    fname,
-                    ", ".join([p[0] for p in preds]),
-                    ", ".join([p[1] for p in preds]),
-                    hex_color,
-                    basic_color,
-                    faces_data,
-                    thumbnail
-                ))
-    # Build dataframe
-    df = pd.DataFrame(results, columns=[
-        "Filename", "Top 3 Predictions", "Confidence",
-        "Dominant Color", "Basic Color", "Face Info", "Thumbnail"
-    ])
-    # Save XLSX with zip name + date
-    out_xlsx = os.path.join(tempfile.gettempdir(), f"{zip_name}_{date_str}_results.xlsx")
-    df.to_excel(out_xlsx, index=False)
-    # ---------------------------
-    # Plot 1: Basic color frequency
-    # ---------------------------
-    fig1, ax1 = plt.subplots()
-    color_counts = df["Basic Color"].value_counts()
-    ax1.bar(color_counts.index, color_counts.values, color="skyblue")
-    ax1.set_title("Basic Color Frequency")
-    ax1.set_ylabel("Count")
-    buf1 = io.BytesIO()
-    plt.savefig(buf1, format="png")
-    plt.close(fig1)
-    buf1.seek(0)
-    plot1_img = Image.open(buf1)
-    # ---------------------------
-    # Plot 2: Top prediction distribution
-    # ---------------------------
-    fig2, ax2 = plt.subplots()
-    preds_flat = []
-    for p in df["Top 3 Predictions"]:
-        preds_flat.extend(p.split(", "))
-    pred_counts = pd.Series(preds_flat).value_counts().head(20)
-    ax2.barh(pred_counts.index[::-1], pred_counts.values[::-1], color="salmon")
-    ax2.set_title("Top Prediction Distribution")
-    ax2.set_xlabel("Count")
-    buf2 = io.BytesIO()
-    plt.savefig(buf2, format="png", bbox_inches="tight")
-    plt.close(fig2)
-    buf2.seek(0)
-    plot2_img = Image.open(buf2)
-    # ---------------------------
-    # Extract ages and genders
-    # ---------------------------
-    ages_male, ages_female = [], []
-    gender_confidence = {"Homme": 0, "Femme": 0}
-    for face_list in df["Face Info"]:
-        for face in face_list:
-            age = face["age"]
-            gender_dict = face["gender"]
-            gender = max(gender_dict, key=gender_dict.get)
-            conf = float(gender_dict[gender]) / 100
-            weight = min(conf, 0.9)
-            gender_trans = "Homme" if gender == "Man" else "Femme"
-            gender_confidence[gender_trans] += weight
-            if gender_trans == "Homme":
-                ages_male.append(age)
-            else:
-                ages_female.append(age)
-    # ---------------------------
-    # Plot 3: Gender distribution
-    # ---------------------------
-    fig3, ax3 = plt.subplots()
-    ax3.bar(gender_confidence.keys(), gender_confidence.values(), color=["lightblue", "pink"])
-    ax3.set_title("Gender Distribution (Weighted ≤90%)")
-    ax3.set_ylabel("Sum of Confidence")
-    buf3 = io.BytesIO()
-    plt.savefig(buf3, format="png")
-    plt.close(fig3)
-    buf3.seek(0)
-    plot3_img = Image.open(buf3)
-    # ---------------------------
-    # Plot 4: Age distribution by gender
-    # ---------------------------
-    fig4, ax4 = plt.subplots()
-    bins = range(0, 101, 5)
-    ax4.hist([ages_male, ages_female], bins=bins, color=["lightblue", "pink"], label=["Homme", "Femme"], edgecolor="black")
-    ax4.set_title("Age Distribution by Gender")
-    ax4.set_xlabel("Age")
-    ax4.set_ylabel("Count")
-    ax4.legend()
-    buf4 = io.BytesIO()
-    plt.savefig(buf4, format="png")
-    plt.close(fig4)
-    buf4.seek(0)
-    plot4_img = Image.open(buf4)
-    return df, out_xlsx, plot1_img, plot2_img, plot3_img, plot4_img
-# ---------------------------
-# Gradio Interface
-# ---------------------------
-demo = gr.Interface(
-    fn=classify_zip_and_analyze_color,
-    inputs=gr.File(file_types=[".zip"], label="Upload ZIP of images"),
-    outputs=[
-        gr.Dataframe(
-            headers=["Filename", "Top 3 Predictions", "Confidence",
-                     "Dominant Color", "Basic Color", "Face Info", "Thumbnail"],
-            datatype=["str","str","str","str","str","str","pil"]
-        ),
-        gr.File(label="Download XLSX"),
-        gr.Image(type="pil", label="Basic Color Frequency"),
-        gr.Image(type="pil", label="Top Prediction Distribution"),
-        gr.Image(type="pil", label="Gender Distribution (Weighted ≤90%)"),
-        gr.Image(type="pil", label="Age Distribution by Gender"),
-    ],
-    title="Image Classifier with Color & Face Analysis",
-    description="Upload a ZIP of images. Classifies images, analyzes dominant color, detects/characterizes faces (age, gender, emotion), and shows thumbnails.",
-)
 if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860)

+import gradio as gr
+import requests
 import os
 import tempfile
+import shutil
+import urllib.request
+import isodate
+import datetime
+# --- IMPORTANT: Ensure this environment variable is set ---
+API_KEY = os.getenv("YOUTUBE_API_KEY")
+BASE_URL = "https://www.googleapis.com/youtube/v3"
+# -----------------------
+# API Usage Tracker
+# -----------------------
+API_USAGE = {"units": 0}
+def api_get(url, cost, **kwargs):
+    """Wrapper to count quota usage"""
+    API_USAGE["units"] += cost
+    r = requests.get(url, **kwargs)
+    return r
+# -----------------------
+# Helper Functions (Simplified)
+# -----------------------
+def parse_duration(duration_str):
+    try:
+        return int(isodate.parse_duration(duration_str).total_seconds())
+    except Exception:
+        return 0
+def get_channel_info(channel_id):
+    """Fetches channel snippet (including title) (Cost: 1)."""
+    r = api_get(f"{BASE_URL}/channels?part=snippet&id={channel_id}&key={API_KEY}", 1)
+    if r.status_code == 200 and 'items' in r.json() and r.json()['items']:
+        return r.json()['items'][0]['snippet']
+    return None
+def extract_channel_id(url: str):
+    """Extracts the Channel ID from various YouTube URLs."""
+    if "channel/" in url:
+        return url.split("channel/")[1].split("/")[0]
+    elif "/@" in url:
+        handle = url.split("/@")[1].split("/")[0]
+        r = api_get(f"{BASE_URL}/search?part=snippet&type=channel&q={handle}&key={API_KEY}", 100)
+        if r.status_code != 200: return None
+        data = r.json()
+        if "items" in data and data["items"]:
+            return data["items"][0]["snippet"]["channelId"]
+    elif "user/" in url:
+        username = url.split("user/")[1].split("/")[0]
+        r = api_get(f"{BASE_URL}/channels?part=id&forUsername={username}&key={API_KEY}", 1)
+        if r.status_code != 200: return None
+        data = r.json()
+        if "items" in data and data["items"]:
+            return data["items"][0]["id"]
+    return None
+def get_uploads_playlist(channel_id):
+    """Fetches the 'uploads' playlist ID for a given channel (Cost: 1)."""
+    r = api_get(f"{BASE_URL}/channels?part=contentDetails&id={channel_id}&key={API_KEY}", 1).json()
+    return r['items'][0]['contentDetails']['relatedPlaylists']['uploads']
+# -----------------------
+# Fetch and Filter Video IDs
+# -----------------------
+def filter_video_ids(video_ids, mode="videos"):
+    """Filters a list of video IDs based on their duration (Cost: 1 unit per 50 videos)."""
+    selected = []
+    for i in range(0, len(video_ids), 50):
+        batch = video_ids[i:i+50]
+        r = api_get(f"{BASE_URL}/videos?part=contentDetails&id={','.join(batch)}&key={API_KEY}", 1).json()
+        for item in r.get("items", []):
+            if 'contentDetails' not in item: continue
+            duration = parse_duration(item["contentDetails"]["duration"])
+            vid = item["id"]
+            if mode == "videos":
+                if duration >= 60:
+                    selected.append(vid)
+            elif mode == "shorts":
+                if duration < 60:
+                    selected.append(vid)
+            elif mode == "all":
+                selected.append(vid)
+    return selected
+def get_playlist_video_ids(playlist_id, max_videos=50, mode="videos"):
+    """Pulls video IDs from a playlist and filters them until max_videos is reached."""
+    video_ids = []
+    next_page = None
+    while len(video_ids) < max_videos:
+        fetch_count = 50
+        url = f"{BASE_URL}/playlistItems?part=snippet&playlistId={playlist_id}&maxResults={fetch_count}&key={API_KEY}"
+        if next_page: url += f"&pageToken={next_page}"
+        r = api_get(url, 1).json()
+        raw_ids = [item["snippet"]["resourceId"]["videoId"] for item in r.get("items", [])]
+        filtered_ids = filter_video_ids(raw_ids, mode=mode)
+        remaining_slots = max_videos - len(video_ids)
+        video_ids.extend(filtered_ids[:remaining_slots])
+        next_page = r.get("nextPageToken")
+        if not next_page or len(raw_ids) == 0:
+            break
+    return video_ids[:max_videos]
+def get_live_video_ids(channel_id, max_videos=50):
+    """Fetches completed live streams (Cost: 100)."""
+    video_ids = []
+    url = f"{BASE_URL}/search?part=id&channelId={channel_id}&eventType=completed&type=video&maxResults={max_videos}&key={API_KEY}"
+    r = api_get(url, 100).json()
+    for item in r.get("items", []):
+        video_ids.append(item["id"]["videoId"])
+    return video_ids
+# -----------------------
+# Thumbnails Download and Prep
+# -----------------------
+def download_thumbnails(video_ids):
+    """Downloads thumbnails to a temp directory (Cost: 1 unit per 50 thumbnails)."""
+    tmp_dir = tempfile.mkdtemp()
+    thumb_paths = []
+    for i in range(0, len(video_ids), 50):
+        batch = video_ids[i:i+50]
+        r = api_get(f"{BASE_URL}/videos?part=snippet&id={','.join(batch)}&key={API_KEY}", 1).json()
+        for item in r.get("items", []):
+            if 'snippet' not in item: continue
+            snippet = item['snippet']
+            thumbnails = snippet['thumbnails']
+            thumb_url = thumbnails.get("maxres", thumbnails.get("standard", thumbnails.get("high", thumbnails.get("default"))))["url"]
+            # Use the video title for the filename for better context in gr.Files
+            title_safe = "".join(c if c.isalnum() or c in (' ', '_') else '_' for c in snippet['title']).strip().replace(' ', '_')
+            filename = os.path.join(tmp_dir, f"{title_safe}_{item['id']}.jpg")
+            urllib.request.urlretrieve(thumb_url, filename)
+            thumb_paths.append(filename)
+    return tmp_dir, thumb_paths
+def fetch_channel_thumbnails(channel_url, max_videos, page_mode):
+    """Main function to orchestrate video fetching and thumbnail download."""
+    channel_id = extract_channel_id(channel_url)
+    if not channel_id:
+        return "❌ Could not extract channel ID", None, None, None
+    channel_info = get_channel_info(channel_id)
+    if not channel_info:
+        return "❌ Could not fetch channel info", None, None, None
+    channel_name = channel_info.get("title", "unknown_channel")
+    if page_mode in ["videos", "shorts", "all"]:
+        try:
+            playlist_id = get_uploads_playlist(channel_id)
+        except Exception:
+            return "❌ Could not find channel 'uploads' playlist ID", None, None, None
+        video_ids = get_playlist_video_ids(playlist_id, max_videos=max_videos, mode=page_mode)
+    elif page_mode == "live":
+        video_ids = get_live_video_ids(channel_id, max_videos=max_videos)
+    else:
+        return "❌ Unknown mode", None, None, None
+    if not video_ids:
+        return f"❌ No {page_mode} found", None, None, None
+    tmp_dir, thumbs = download_thumbnails(video_ids)
+    return f"✅ Fetched {len(thumbs)} {page_mode}", thumbs, tmp_dir, channel_name
+def prepare_zip(thumb_dir, channel_name):
+    """Creates a zip archive with a custom filename."""
+    safe_channel_name = "".join(c if c.isalnum() or c in (' ', '_') else '_' for c in channel_name).strip().replace(' ', '_')
+    date_str = datetime.datetime.now().strftime("%Y%m%d")
+    zip_filename_base = f"{safe_channel_name}_Thumbnails_{date_str}"
+    zip_path_no_ext = os.path.join(tempfile.gettempdir(), zip_filename_base)
+    shutil.make_archive(zip_path_no_ext, 'zip', thumb_dir)
+    final_zip_path = zip_path_no_ext + ".zip"
+    return final_zip_path
+# -----------------------
+# Generator for live status updates
+# -----------------------
+def fetch_and_zip_progress(channel_url, max_videos, page_mode):
+    API_USAGE["units"] = 0
+    yield f"Starting fetch... | API quota used: {API_USAGE['units']} units", [], None, gr.File(visible=False) # 💡 Added gr.File update
+    status, thumbs, tmp_dir, channel_name = fetch_channel_thumbnails(channel_url, max_videos, page_mode)
+    quota_used = API_USAGE["units"]
+    final_status = status.replace("videos", "long-form videos (>= 60s)") if page_mode == "videos" else status
+    final_status = final_status.replace("shorts", "shorts (< 60s)") if page_mode == "shorts" else final_status
+    zip_file = None
+    if thumbs:
+        zip_file = prepare_zip(tmp_dir, channel_name)
+    elif tmp_dir and os.path.isdir(tmp_dir):
+        shutil.rmtree(tmp_dir)
+    # 💡 IMPORTANT: Now yielding a list of file paths (thumbs) and the zip file path.
+    # The 'thumbs' list goes to gr.Files.
+    yield f"{final_status} | API quota used: {quota_used} units", thumbs, zip_file, gr.File(visible=True) # 💡 Set visible=True on success
+# -----------------------
+# Gradio Interface (Modified)
+# -----------------------
+with gr.Blocks() as demo:
+    gr.Markdown("## 🎬 YouTube Channel Thumbnails Downloader (Files Preview)")
+    gr.Markdown("Thumbnails are now listed as individual files. Click the filename to preview/download.")
+    url_input = gr.Textbox(label="YouTube Channel URL", placeholder="https://www.youtube.com/@roisinmurphyofficial")
+    page_selector = gr.Dropdown(
+        choices=["videos", "shorts", "live", "all"],
+        value="videos",
+        label="Page to Collect"
+    )
+    max_videos_slider = gr.Slider(minimum=1, maximum=100, step=1, value=20, label="Max Items to Fetch")
+    start_btn = gr.Button("🚀 Start Collect")
+    status_output = gr.Textbox(label="Status")
+    # 💡 REPLACED gr.Gallery with gr.Files
+    thumbs_list = gr.Files(
+        label="Thumbnails Preview and Download (Click name for preview)",
+        file_count="multiple", # Allows multiple files
+        type="filepath", # Returns the path, which is what we need
+        visible=True # Ensure it's visible initially
+    )
+    download_btn = gr.File(label="Download All Thumbnails (ZIP)")
+    start_btn.click(
+        fetch_and_zip_progress,
+        inputs=[url_input, max_videos_slider, page_selector],
+        # 💡 Updated output targets to match the new return values
+        outputs=[status_output, thumbs_list, download_btn, download_btn]
+        # Note: Added download_btn twice as the generator yields 4 items,
+        # but the last one is a gr.File update to hide/show the component.
+        # This is a slightly awkward necessity of Gradio's generator API.
+    )
 if __name__ == "__main__":
+    demo.launch()