Spaces:

wuhp
/

dataviewer

Sleeping

App Files Files Community

wuhp commited on Jun 22, 2025

Commit

5067213

verified ·

1 Parent(s): 7451f43

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -60

app.py CHANGED Viewed

@@ -3,50 +3,53 @@ import json
 import requests
 import matplotlib.pyplot as plt
 import gradio as gr
 def parse_roboflow_url(url):
     """
     Extract workspace/project and version from a Roboflow Universe URL.
-    Example: https://universe.roboflow.com/airborne-object-detection/airborne-object-detection-4-aod4/dataset/6
     Returns (workspace, project, version)
     """
     pattern = r"roboflow\.com/([^/]+)/([^/]+)/dataset/(\d+)"
     match = re.search(pattern, url)
     if not match:
         raise ValueError(f"Invalid Roboflow dataset URL: {url}")
-    return match.groups()  # (workspace, project, version)
 def fetch_metadata(api_key, workspace, project, version):
     """
     Fetch metadata for a given project version from Roboflow API.
-    Returns total image count and class->count mapping.
     """
     endpoint = f"https://api.roboflow.com/{workspace}/{project}/{version}"
-    resp = requests.get(endpoint, params={"api_key": api_key})
-    resp.raise_for_status()
     data = resp.json()
-    total = data.get('version', {}).get('images') or data.get('project', {}).get('images', 0)
-    classes = data.get('project', {}).get('classes', {})
     return total, classes
 def aggregate_datasets(api_key, entries):
     """
-    Given API key and list of (url, file_name, line_no) tuples,
-    returns total images, aggregated lowercase class counts,
     and per-class source URLs.
-    Raises ValueError with file and line for invalid URLs.
     """
     total_images = 0
     class_counts = {}
     class_sources = {}
     for url, fname, lineno in entries:
-        try:
-            ws, proj, ver = parse_roboflow_url(url)
-        except ValueError:
-            raise ValueError(f"Invalid URL '{url}' in file '{fname}', line {lineno}")
         imgs, cls_map = fetch_metadata(api_key, ws, proj, ver)
         total_images += imgs
         for cls, cnt in cls_map.items():
@@ -61,38 +64,36 @@ def make_bar_chart(counts):
     Return a matplotlib figure showing a bar chart of counts dict.
     """
     fig, ax = plt.subplots()
-    ax.bar(counts.keys(), counts.values())
-    ax.set_xticklabels(counts.keys(), rotation=45, ha='right')
-    ax.set_ylabel('Image Count')
-    ax.set_title('Class Distribution')
     fig.tight_layout()
     return fig
 def load_datasets(api_key, file_objs):
     """
-    Read multiple .txt uploads, parse URLs with file/line info,
-    dedupe URLs, and aggregate metadata. Reports precise errors.
-    Returns: total_images, dataframe_data, plot_fig, json_counts, markdown_sources.
     """
     entries = []
     seen = set()
     for fobj in file_objs:
-        # Determine filename for error reporting
-        fname = getattr(fobj, 'name', None) or fobj.get('name', 'unknown')
-        # Attempt to read raw bytes or retrieve .data
-        raw = None
         try:
             raw = fobj.read()
-        except Exception:
-            raw = fobj.get('data') if isinstance(fobj, dict) else None
         if raw is None and isinstance(fobj, str):
-            with open(fobj, 'rb') as f:
-                raw = f.read()
-        content = raw.decode('utf-8') if isinstance(raw, (bytes, bytearray)) else raw
-        for i, line in enumerate(content.splitlines(), start=1):
             url = line.strip()
             if url and url not in seen:
                 seen.add(url)
@@ -100,10 +101,10 @@ def load_datasets(api_key, file_objs):
     total, counts, sources = aggregate_datasets(api_key, entries)
-    # Prepare DataFrame data
     df_data = [[cls, counts[cls]] for cls in counts]
-    # Prepare clickable sources markdown
     md_lines = []
     for cls in counts:
         links = ", ".join(f"[{s.split('/')[-1]}]({s})" for s in sources[cls])
@@ -116,26 +117,24 @@ def load_datasets(api_key, file_objs):
 def update_classes(df_data):
     """
-    Combine edited classes (merge duplicates, lowercase) and recalc.
-    Returns: total_images, updated_dataframe, plot_fig, json_counts, markdown_summary.
     """
     combined = {}
-    for row in df_data:
-        if not row[0]:
             continue
-        name = row[0].strip().lower()
         try:
-            cnt = int(row[1])
-        except Exception:
-            cnt = 0
-        combined[name] = combined.get(name, 0) + cnt
     total = sum(combined.values())
-    # Build updated DataFrame
-    updated_df = [[cls, combined[cls]] for cls in combined]
     fig = make_bar_chart(combined)
-    md_summary = "\n".join(f"- **{cls}** ({combined[cls]} images)" for cls in combined)
     return str(total), updated_df, fig, json.dumps(combined, indent=2), md_summary
@@ -144,27 +143,30 @@ def build_ui():
         gr.Markdown("## Roboflow Dataset Inspector")
         with gr.Row():
-            api_input = gr.Textbox(label="Roboflow API Key", type="password")
-            files = gr.Files(label="Upload .txt files of Roboflow URLs", file_types=[".txt"])
         load_btn = gr.Button("Load Datasets")
         total_out = gr.Textbox(label="Total Images", interactive=False)
-        df = gr.Dataframe(headers=["Class Name", "Count"], row_count=(1, None), col_count=2, interactive=True)
         plot = gr.Plot()
         json_out = gr.Textbox(label="Counts (JSON)", interactive=False)
         md_out = gr.Markdown(label="Class Sources")
         update_btn = gr.Button("Apply Class Edits")
-        # Load datasets
-        load_btn.click(fn=load_datasets,
-                       inputs=[api_input, files],
-                       outputs=[total_out, df, plot, json_out, md_out])
-        # Apply edits and refresh all outputs (including table)
-        update_btn.click(fn=update_classes,
-                         inputs=[df],
-                         outputs=[total_out, df, plot, json_out, md_out])
     return demo

 import requests
 import matplotlib.pyplot as plt
 import gradio as gr
+from requests.exceptions import HTTPError
 def parse_roboflow_url(url):
     """
     Extract workspace/project and version from a Roboflow Universe URL.
+    Example: https://universe.roboflow.com/.../dataset/6
     Returns (workspace, project, version)
     """
     pattern = r"roboflow\.com/([^/]+)/([^/]+)/dataset/(\d+)"
     match = re.search(pattern, url)
     if not match:
         raise ValueError(f"Invalid Roboflow dataset URL: {url}")
+    return match.groups()
 def fetch_metadata(api_key, workspace, project, version):
     """
     Fetch metadata for a given project version from Roboflow API.
+    Raises ValueError on HTTP errors.
     """
     endpoint = f"https://api.roboflow.com/{workspace}/{project}/{version}"
+    try:
+        resp = requests.get(endpoint, params={"api_key": api_key})
+        resp.raise_for_status()
+    except HTTPError:
+        if resp.status_code == 401:
+            raise ValueError("Unauthorized: check your API key.")
+        else:
+            raise ValueError(f"Error fetching {workspace}/{project}/{version}: {resp.status_code}")
     data = resp.json()
+    total = data.get("version", {}).get("images") or data.get("project", {}).get("images", 0)
+    classes = data.get("project", {}).get("classes", {})
     return total, classes
 def aggregate_datasets(api_key, entries):
     """
+    Given API key and list of (url, file, line) tuples,
+    returns total_images, aggregated lowercase class counts,
     and per-class source URLs.
     """
     total_images = 0
     class_counts = {}
     class_sources = {}
     for url, fname, lineno in entries:
+        ws, proj, ver = parse_roboflow_url(url)
         imgs, cls_map = fetch_metadata(api_key, ws, proj, ver)
         total_images += imgs
         for cls, cnt in cls_map.items():
     Return a matplotlib figure showing a bar chart of counts dict.
     """
     fig, ax = plt.subplots()
+    keys = list(counts.keys())
+    vals = list(counts.values())
+    ax.bar(range(len(keys)), vals)
+    ax.set_xticks(range(len(keys)))
+    ax.set_xticklabels(keys, rotation=45, ha="right")
+    ax.set_ylabel("Image Count")
+    ax.set_title("Class Distribution")
     fig.tight_layout()
     return fig
 def load_datasets(api_key, file_objs):
     """
+    Read uploaded .txt files, dedupe URLs, fetch metadata,
+    and return all outputs for the UI.
     """
     entries = []
     seen = set()
     for fobj in file_objs:
+        fname = getattr(fobj, "name", None) or fobj.get("name", "unknown")
+        # read raw content
         try:
             raw = fobj.read()
+        except:
+            raw = fobj.get("data") if isinstance(fobj, dict) else None
         if raw is None and isinstance(fobj, str):
+            with open(fobj, "rb") as fh:
+                raw = fh.read()
+        text = raw.decode("utf-8") if isinstance(raw, (bytes, bytearray)) else raw
+        for i, line in enumerate(text.splitlines(), start=1):
             url = line.strip()
             if url and url not in seen:
                 seen.add(url)
     total, counts, sources = aggregate_datasets(api_key, entries)
+    # build dataframe list
     df_data = [[cls, counts[cls]] for cls in counts]
+    # build markdown of sources
     md_lines = []
     for cls in counts:
         links = ", ".join(f"[{s.split('/')[-1]}]({s})" for s in sources[cls])
 def update_classes(df_data):
     """
+    Take the edited table rows, merge duplicates (lowercase),
+    and return updated total, df, chart, JSON, and markdown.
     """
     combined = {}
+    for name, cnt in df_data:
+        if not name:
             continue
+        key = name.strip().lower()
         try:
+            val = int(cnt)
+        except:
+            val = 0
+        combined[key] = combined.get(key, 0) + val
     total = sum(combined.values())
+    updated_df = [[k, combined[k]] for k in combined]
     fig = make_bar_chart(combined)
+    md_summary = "\n".join(f"- **{k}** ({combined[k]} images)" for k in combined)
     return str(total), updated_df, fig, json.dumps(combined, indent=2), md_summary
         gr.Markdown("## Roboflow Dataset Inspector")
         with gr.Row():
+            api_input = gr.Textbox(label="API Key", type="password")
+            files = gr.Files(label="Upload .txt files", file_types=[".txt"])
         load_btn = gr.Button("Load Datasets")
         total_out = gr.Textbox(label="Total Images", interactive=False)
+        df = gr.Dataframe(
+            headers=["Class Name", "Count"], row_count=(1, None), col_count=2, interactive=True
+        )
         plot = gr.Plot()
         json_out = gr.Textbox(label="Counts (JSON)", interactive=False)
         md_out = gr.Markdown(label="Class Sources")
         update_btn = gr.Button("Apply Class Edits")
+        load_btn.click(
+            fn=load_datasets,
+            inputs=[api_input, files],
+            outputs=[total_out, df, plot, json_out, md_out],
+        )
+        update_btn.click(
+            fn=update_classes,
+            inputs=[df],
+            outputs=[total_out, df, plot, json_out, md_out],
+        )
     return demo