Spaces:

segments
/

count-by-class

Sleeping

App Files Files Community

Tomatillo commited on Sep 11, 2025

Commit

00a6dbb

verified ·

1 Parent(s): 3372d59

Updated to include count per class

Browse files

Files changed (1) hide show

src/streamlit_app.py +53 -19

src/streamlit_app.py CHANGED Viewed

@@ -1,4 +1,22 @@
 #!/usr/bin/env python3
 import streamlit as st
 import io
@@ -8,6 +26,7 @@ from segments import SegmentsClient
 from datetime import datetime
 import sys
 import os
 from get_labels_from_samples import (
     get_samples as get_samples_objects,
     export_frames_and_annotations,
@@ -49,24 +68,28 @@ def parse_classes(input_str: str) -> list:
     return sorted(set(classes))
-def _count_from_frames(frames, target_set):
-    """Helper to count frames, total annotations, and matching annotations directly."""
     if not frames:
-        return 0, 0, 0
     num_frames = len(frames)
     total_annotations = 0
     matching_annotations = 0
     for f in frames:
         anns = getattr(f, 'annotations', [])
         total_annotations += len(anns)
         if target_set:
             for ann in anns:
-                if getattr(ann, 'category_id', None) in target_set:
                     matching_annotations += 1
-    return num_frames, total_annotations, matching_annotations
-def compute_metrics_for_sample(sample, api_key, target_set, is_multisensor, sensor_select):
     """
     Fetch label for a single sample and compute metrics.
     Returns a list of metric dicts (one per sensor if 'All sensors', otherwise one).
@@ -87,7 +110,7 @@ def compute_metrics_for_sample(sample, api_key, target_set, is_multisensor, sens
                 for sensor in sensors:
                     if getattr(sensor, 'name', None) == sensor_select:
                         frames = getattr(getattr(sensor, 'attributes', None), 'frames', [])
-                        num_frames, total_annotations, matching_annotations = _count_from_frames(frames, target_set)
                         metrics_rows.append({
                             'name': getattr(sample, 'name', sample.uuid),
                             'uuid': sample.uuid,
@@ -96,6 +119,7 @@ def compute_metrics_for_sample(sample, api_key, target_set, is_multisensor, sens
                             'num_frames': num_frames,
                             'total_annotations': total_annotations,
                             'matching_annotations': matching_annotations,
                             'labeled_by': labeled_by,
                             'reviewed_by': reviewed_by
                         })
@@ -105,7 +129,7 @@ def compute_metrics_for_sample(sample, api_key, target_set, is_multisensor, sens
                 for sensor in sensors:
                     sensor_name = getattr(sensor, 'name', 'Unknown')
                     frames = getattr(getattr(sensor, 'attributes', None), 'frames', [])
-                    num_frames, total_annotations, matching_annotations = _count_from_frames(frames, target_set)
                     metrics_rows.append({
                         'name': getattr(sample, 'name', sample.uuid),
                         'uuid': sample.uuid,
@@ -114,13 +138,14 @@ def compute_metrics_for_sample(sample, api_key, target_set, is_multisensor, sens
                         'num_frames': num_frames,
                         'total_annotations': total_annotations,
                         'matching_annotations': matching_annotations,
                         'labeled_by': labeled_by,
                         'reviewed_by': reviewed_by
                     })
         else:
             # single-sensor dataset
             frames = getattr(getattr(label, 'attributes', None), 'frames', [])
-            num_frames, total_annotations, matching_annotations = _count_from_frames(frames, target_set)
             metrics_rows.append({
                 'name': getattr(sample, 'name', sample.uuid),
                 'uuid': sample.uuid,
@@ -129,6 +154,7 @@ def compute_metrics_for_sample(sample, api_key, target_set, is_multisensor, sens
                 'num_frames': num_frames,
                 'total_annotations': total_annotations,
                 'matching_annotations': matching_annotations,
                 'labeled_by': labeled_by,
                 'reviewed_by': reviewed_by
             })
@@ -138,7 +164,7 @@ def compute_metrics_for_sample(sample, api_key, target_set, is_multisensor, sens
         return []
-def generate_csv(metrics: list, dataset_identifier: str) -> str:
     """
     Generate CSV content from list of per-sample metrics.
     Columns: name, sample_url, sensor, num_frames, total_annotations,
@@ -146,19 +172,26 @@ def generate_csv(metrics: list, dataset_identifier: str) -> str:
     """
     output = io.StringIO()
     writer = csv.writer(output)
-    writer.writerow([
         'name', 'sample_url', 'sensor', 'num_frames',
-        'total_annotations', 'matching_annotations',
-        'labeled_by', 'reviewed_by'
-    ])
     for m in metrics:
         url = f"https://app.segments.ai/{dataset_identifier}/samples/{m['uuid']}/{m['labelset']}"
-        writer.writerow([
             m['name'], url, m['sensor'],
             m['num_frames'], m['total_annotations'],
-            m['matching_annotations'], m['labeled_by'],
-            m['reviewed_by']
-        ])
     content = output.getvalue()
     output.close()
     return content
@@ -234,6 +267,7 @@ if run_button:
                         sample,
                         api_key,
                         target_set,
                         is_multisensor,
                         sensor_select,
                     )
@@ -249,7 +283,7 @@ if run_button:
             if not metrics:
                 st.session_state.error = "No metrics could be generated for the dataset."
             else:
-                st.session_state.csv_content = generate_csv(metrics, dataset_identifier)
             if status_ctx is not None:
                 status_ctx.update(label="CSV generated!", state="complete")
         except Exception as e:

 #!/usr/bin/env python3
+"""
+File: count_by_class.py
+Location: 6_Data_metrics/count_by_class.py
+Description:
+    Streamlit application to count and report metrics per sample for specified classes.
+    For each sample, outputs:
+      - Sample name
+      - Sample URL (including labelset)
+      - Number of frames (ignoring a 31st frame if present)
+      - Total number of annotations
+      - Number of annotations matching any of the user-specified classes
+      - Labeled by (from label data)
+      - Reviewed by (from label data)
+Usage:
+    streamlit run 6_Data_metrics/count_by_class.py
+"""
 import streamlit as st
 import io
 from datetime import datetime
 import sys
 import os
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../0_label_scripts")))
 from get_labels_from_samples import (
     get_samples as get_samples_objects,
     export_frames_and_annotations,
     return sorted(set(classes))
+def _count_from_frames(frames, target_set, class_ids):
+    """Helper to count frames, totals, and per-class counts directly."""
     if not frames:
+        return 0, 0, 0, {cid: 0 for cid in class_ids}
     num_frames = len(frames)
     total_annotations = 0
     matching_annotations = 0
+    class_counts = {cid: 0 for cid in class_ids}
     for f in frames:
         anns = getattr(f, 'annotations', [])
         total_annotations += len(anns)
         if target_set:
             for ann in anns:
+                cid = getattr(ann, 'category_id', None)
+                if cid in target_set:
                     matching_annotations += 1
+                    if cid in class_counts:
+                        class_counts[cid] += 1
+    return num_frames, total_annotations, matching_annotations, class_counts
+def compute_metrics_for_sample(sample, api_key, target_set, class_ids, is_multisensor, sensor_select):
     """
     Fetch label for a single sample and compute metrics.
     Returns a list of metric dicts (one per sensor if 'All sensors', otherwise one).
                 for sensor in sensors:
                     if getattr(sensor, 'name', None) == sensor_select:
                         frames = getattr(getattr(sensor, 'attributes', None), 'frames', [])
+                        num_frames, total_annotations, matching_annotations, class_counts = _count_from_frames(frames, target_set, class_ids)
                         metrics_rows.append({
                             'name': getattr(sample, 'name', sample.uuid),
                             'uuid': sample.uuid,
                             'num_frames': num_frames,
                             'total_annotations': total_annotations,
                             'matching_annotations': matching_annotations,
+                            'class_counts': class_counts,
                             'labeled_by': labeled_by,
                             'reviewed_by': reviewed_by
                         })
                 for sensor in sensors:
                     sensor_name = getattr(sensor, 'name', 'Unknown')
                     frames = getattr(getattr(sensor, 'attributes', None), 'frames', [])
+                    num_frames, total_annotations, matching_annotations, class_counts = _count_from_frames(frames, target_set, class_ids)
                     metrics_rows.append({
                         'name': getattr(sample, 'name', sample.uuid),
                         'uuid': sample.uuid,
                         'num_frames': num_frames,
                         'total_annotations': total_annotations,
                         'matching_annotations': matching_annotations,
+                        'class_counts': class_counts,
                         'labeled_by': labeled_by,
                         'reviewed_by': reviewed_by
                     })
         else:
             # single-sensor dataset
             frames = getattr(getattr(label, 'attributes', None), 'frames', [])
+            num_frames, total_annotations, matching_annotations, class_counts = _count_from_frames(frames, target_set, class_ids)
             metrics_rows.append({
                 'name': getattr(sample, 'name', sample.uuid),
                 'uuid': sample.uuid,
                 'num_frames': num_frames,
                 'total_annotations': total_annotations,
                 'matching_annotations': matching_annotations,
+                'class_counts': class_counts,
                 'labeled_by': labeled_by,
                 'reviewed_by': reviewed_by
             })
         return []
+def generate_csv(metrics: list, dataset_identifier: str, target_classes: list[int]) -> str:
     """
     Generate CSV content from list of per-sample metrics.
     Columns: name, sample_url, sensor, num_frames, total_annotations,
     """
     output = io.StringIO()
     writer = csv.writer(output)
+    header = [
         'name', 'sample_url', 'sensor', 'num_frames',
+        'total_annotations', 'matching_annotations'
+    ]
+    # dynamic per-class columns
+    header.extend([f'class_{cid}' for cid in target_classes])
+    header.extend(['labeled_by', 'reviewed_by'])
+    writer.writerow(header)
     for m in metrics:
         url = f"https://app.segments.ai/{dataset_identifier}/samples/{m['uuid']}/{m['labelset']}"
+        row = [
             m['name'], url, m['sensor'],
             m['num_frames'], m['total_annotations'],
+            m['matching_annotations']
+        ]
+        # add per-class counts in the same order as header
+        class_counts = m.get('class_counts', {})
+        row.extend([class_counts.get(cid, 0) for cid in target_classes])
+        row.extend([m['labeled_by'], m['reviewed_by']])
+        writer.writerow(row)
     content = output.getvalue()
     output.close()
     return content
                         sample,
                         api_key,
                         target_set,
+                        target_classes,
                         is_multisensor,
                         sensor_select,
                     )
             if not metrics:
                 st.session_state.error = "No metrics could be generated for the dataset."
             else:
+                st.session_state.csv_content = generate_csv(metrics, dataset_identifier, target_classes)
             if status_ctx is not None:
                 status_ctx.update(label="CSV generated!", state="complete")
         except Exception as e: