Spaces:
Sleeping
Sleeping
Lisa Dunlap commited on
Commit ·
6334bb3
1
Parent(s): fb9b9d1
updated help page
Browse files- stringsight/dashboard/app.py +3 -3
- stringsight/dashboard/utils.py +45 -46
stringsight/dashboard/app.py
CHANGED
|
@@ -341,11 +341,11 @@ def create_app() -> gr.Blocks:
|
|
| 341 |
with gr.Group(visible=False, elem_id="help-panel") as help_panel:
|
| 342 |
help_md = gr.Markdown(
|
| 343 |
"""
|
| 344 |
-
**Overview**: Compare model
|
| 345 |
|
| 346 |
-
**View Clusters**: Explore
|
| 347 |
|
| 348 |
-
**View Examples**: Inspect individual examples
|
| 349 |
"""
|
| 350 |
)
|
| 351 |
help_close_btn = gr.Button("Close", variant="secondary", elem_id="help-close-btn")
|
|
|
|
| 341 |
with gr.Group(visible=False, elem_id="help-panel") as help_panel:
|
| 342 |
help_md = gr.Markdown(
|
| 343 |
"""
|
| 344 |
+
**Overview**: Compare model metrics and what behaviors are more unique to each model.
|
| 345 |
|
| 346 |
+
**View Clusters**: Explore the individual bheaviors seen in each property cluster. To view that example, copy over the property description into the search on the examples tab.
|
| 347 |
|
| 348 |
+
**View Examples**: Inspect individual examples and localize each example to the property cluster that it belongs to.
|
| 349 |
"""
|
| 350 |
)
|
| 351 |
help_close_btn = gr.Button("Close", variant="secondary", elem_id="help-close-btn")
|
stringsight/dashboard/utils.py
CHANGED
|
@@ -1706,6 +1706,7 @@ def get_example_data(
|
|
| 1706 |
"response": response_val,
|
| 1707 |
"property_description": row.get("property_description", "N/A"),
|
| 1708 |
"score": row.get("score", "N/A"),
|
|
|
|
| 1709 |
"fine_cluster_id": fine_cluster_id,
|
| 1710 |
"fine_cluster_label": fine_cluster_label,
|
| 1711 |
"coarse_cluster_id": coarse_cluster_id,
|
|
@@ -1850,26 +1851,7 @@ def format_examples_display(examples: List[Dict[str, Any]],
|
|
| 1850 |
)
|
| 1851 |
|
| 1852 |
# Score display for summary (only for non-side-by-side or when not shown in side-by-side)
|
| 1853 |
-
score_badge
|
| 1854 |
-
if not example.get('is_side_by_side', False) and example['score'] != 'N/A':
|
| 1855 |
-
try:
|
| 1856 |
-
score_val = float(example['score'])
|
| 1857 |
-
score_color = '#28a745' if score_val >= 0 else '#dc3545'
|
| 1858 |
-
score_badge = f"""
|
| 1859 |
-
<span style="
|
| 1860 |
-
background: {score_color};
|
| 1861 |
-
color: white;
|
| 1862 |
-
padding: 4px 8px;
|
| 1863 |
-
border-radius: 12px;
|
| 1864 |
-
font-size: 12px;
|
| 1865 |
-
font-weight: bold;
|
| 1866 |
-
margin-left: 10px;
|
| 1867 |
-
">
|
| 1868 |
-
Score: {score_val:.3f}
|
| 1869 |
-
</span>
|
| 1870 |
-
"""
|
| 1871 |
-
except:
|
| 1872 |
-
pass
|
| 1873 |
|
| 1874 |
# Create short preview of prompt for summary
|
| 1875 |
prompt_preview = example['prompt'][:80] + "..." if len(example['prompt']) > 80 else example['prompt']
|
|
@@ -1878,30 +1860,48 @@ def format_examples_display(examples: List[Dict[str, Any]],
|
|
| 1878 |
# First example is expanded by default
|
| 1879 |
open_attr = "open" if i == 1 else ""
|
| 1880 |
|
| 1881 |
-
# Build
|
| 1882 |
-
|
| 1883 |
-
|
| 1884 |
-
|
| 1885 |
-
|
| 1886 |
-
|
| 1887 |
-
|
| 1888 |
-
|
| 1889 |
-
|
| 1890 |
-
|
| 1891 |
-
|
| 1892 |
-
|
| 1893 |
-
|
| 1894 |
-
|
| 1895 |
-
|
| 1896 |
-
|
| 1897 |
-
|
| 1898 |
-
|
| 1899 |
-
|
| 1900 |
-
|
| 1901 |
-
|
| 1902 |
-
|
| 1903 |
-
|
| 1904 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1905 |
|
| 1906 |
html_out += f"""
|
| 1907 |
<details {open_attr} style="border: 1px solid #dee2e6; border-radius: 8px; margin-bottom: 15px; background: white; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
|
|
@@ -1932,6 +1932,7 @@ def format_examples_display(examples: List[Dict[str, Any]],
|
|
| 1932 |
<span style="display:inline-block; padding:2px 8px; border-radius:999px; background:#f3f4f6; border:1px solid #e5e7eb;">ID: {html.escape(str(example['id']))}</span>
|
| 1933 |
<span style="display:inline-block; padding:2px 8px; border-radius:999px; background:#f3f4f6; border:1px solid #e5e7eb;">Model: {html.escape(str(example['model']))}</span>
|
| 1934 |
{tag_badge}
|
|
|
|
| 1935 |
{(f'<span style="display:inline-block; padding:2px 8px; border-radius:999px; background:#ecfdf5; color:#047857; border:1px solid #bbf7d0;">Category: {html.escape(str(example["category"]))}</span>' if example["category"] not in [None, "N/A", "None", "", "null"] and str(example["category"]).strip() != "" else '')}
|
| 1936 |
{(f'<span style="display:inline-block; padding:2px 8px; border-radius:999px; background:#eff6ff; color:#1d4ed8; border:1px solid #dbeafe;">Type: {html.escape(str(example["type"]))}</span>' if example["type"] not in [None, "N/A", "None", "", "null"] and str(example["type"]).strip() != "" else '')}
|
| 1937 |
{(f'<span style="display:inline-block; padding:2px 8px; border-radius:999px; background:#fff7ed; color:#c2410c; border:1px solid #fed7aa;">Impact: {html.escape(str(example["impact"]))}</span>' if example["impact"] not in [None, "N/A", "None", "", "null"] and str(example["impact"]).strip() != "" else '')}
|
|
@@ -1956,8 +1957,6 @@ def format_examples_display(examples: List[Dict[str, Any]],
|
|
| 1956 |
example.get("evidence") not in [None, "N/A", "None", "", "null"] and str(example.get("evidence", "")).strip() != "",
|
| 1957 |
]) else ''}
|
| 1958 |
|
| 1959 |
-
{score_section_html}
|
| 1960 |
-
|
| 1961 |
<div style="margin-bottom: 15px;">
|
| 1962 |
<div style="border-radius: 6px; font-size: 15px; line-height: 1.5;">
|
| 1963 |
{conversation_html}
|
|
|
|
| 1706 |
"response": response_val,
|
| 1707 |
"property_description": row.get("property_description", "N/A"),
|
| 1708 |
"score": row.get("score", "N/A"),
|
| 1709 |
+
"scores": row.get("scores", None),
|
| 1710 |
"fine_cluster_id": fine_cluster_id,
|
| 1711 |
"fine_cluster_label": fine_cluster_label,
|
| 1712 |
"coarse_cluster_id": coarse_cluster_id,
|
|
|
|
| 1851 |
)
|
| 1852 |
|
| 1853 |
# Score display for summary (only for non-side-by-side or when not shown in side-by-side)
|
| 1854 |
+
# (Removed) score_badge in header; scores will be shown in badges row instead
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1855 |
|
| 1856 |
# Create short preview of prompt for summary
|
| 1857 |
prompt_preview = example['prompt'][:80] + "..." if len(example['prompt']) > 80 else example['prompt']
|
|
|
|
| 1860 |
# First example is expanded by default
|
| 1861 |
open_attr = "open" if i == 1 else ""
|
| 1862 |
|
| 1863 |
+
# Build score chips for badges row (show scalar score and/or scores dict), excluding side-by-side
|
| 1864 |
+
score_chips_html = ""
|
| 1865 |
+
if not example.get('is_side_by_side', False):
|
| 1866 |
+
chips: list[str] = []
|
| 1867 |
+
# Scalar score
|
| 1868 |
+
raw_score = example.get('score')
|
| 1869 |
+
if isinstance(raw_score, (int, float)):
|
| 1870 |
+
sv = float(raw_score)
|
| 1871 |
+
bg = '#ecfdf5' if sv >= 0 else '#fee2e2'
|
| 1872 |
+
fg = '#047857' if sv >= 0 else '#991b1b'
|
| 1873 |
+
chips.append(
|
| 1874 |
+
f"<span style=\"display:inline-block; padding:2px 8px; border-radius:999px; background:{bg}; color:{fg}; border:1px solid #e5e7eb;\">Score: {sv:.3f}</span>"
|
| 1875 |
+
)
|
| 1876 |
+
elif isinstance(raw_score, str):
|
| 1877 |
+
s = raw_score.strip() if raw_score is not None else ""
|
| 1878 |
+
if s and re.match(r"^[+-]?\d+(?:\.\d+)?$", s):
|
| 1879 |
+
sv = float(s)
|
| 1880 |
+
bg = '#ecfdf5' if sv >= 0 else '#fee2e2'
|
| 1881 |
+
fg = '#047857' if sv >= 0 else '#991b1b'
|
| 1882 |
+
chips.append(
|
| 1883 |
+
f"<span style=\"display:inline-block; padding:2px 8px; border-radius:999px; background:{bg}; color:{fg}; border:1px solid #e5e7eb;\">Score: {sv:.3f}</span>"
|
| 1884 |
+
)
|
| 1885 |
+
# Scores dict
|
| 1886 |
+
raw_scores = example.get('scores')
|
| 1887 |
+
if isinstance(raw_scores, dict):
|
| 1888 |
+
for k, v in raw_scores.items():
|
| 1889 |
+
label = str(k)
|
| 1890 |
+
val: Optional[float] = None
|
| 1891 |
+
if isinstance(v, (int, float)):
|
| 1892 |
+
val = float(v)
|
| 1893 |
+
elif isinstance(v, str):
|
| 1894 |
+
vs = v.strip()
|
| 1895 |
+
if vs and re.match(r"^[+-]?\d+(?:\.\d+)?$", vs):
|
| 1896 |
+
val = float(vs)
|
| 1897 |
+
if val is not None:
|
| 1898 |
+
bg = '#ecfdf5' if val >= 0 else '#fee2e2'
|
| 1899 |
+
fg = '#047857' if val >= 0 else '#991b1b'
|
| 1900 |
+
chips.append(
|
| 1901 |
+
f"<span style=\"display:inline-block; padding:2px 8px; border-radius:999px; background:{bg}; color:{fg}; border:1px solid #e5e7eb;\">{html.escape(label)}: {val:.3f}</span>"
|
| 1902 |
+
)
|
| 1903 |
+
if chips:
|
| 1904 |
+
score_chips_html = "".join(chips)
|
| 1905 |
|
| 1906 |
html_out += f"""
|
| 1907 |
<details {open_attr} style="border: 1px solid #dee2e6; border-radius: 8px; margin-bottom: 15px; background: white; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
|
|
|
|
| 1932 |
<span style="display:inline-block; padding:2px 8px; border-radius:999px; background:#f3f4f6; border:1px solid #e5e7eb;">ID: {html.escape(str(example['id']))}</span>
|
| 1933 |
<span style="display:inline-block; padding:2px 8px; border-radius:999px; background:#f3f4f6; border:1px solid #e5e7eb;">Model: {html.escape(str(example['model']))}</span>
|
| 1934 |
{tag_badge}
|
| 1935 |
+
{score_chips_html}
|
| 1936 |
{(f'<span style="display:inline-block; padding:2px 8px; border-radius:999px; background:#ecfdf5; color:#047857; border:1px solid #bbf7d0;">Category: {html.escape(str(example["category"]))}</span>' if example["category"] not in [None, "N/A", "None", "", "null"] and str(example["category"]).strip() != "" else '')}
|
| 1937 |
{(f'<span style="display:inline-block; padding:2px 8px; border-radius:999px; background:#eff6ff; color:#1d4ed8; border:1px solid #dbeafe;">Type: {html.escape(str(example["type"]))}</span>' if example["type"] not in [None, "N/A", "None", "", "null"] and str(example["type"]).strip() != "" else '')}
|
| 1938 |
{(f'<span style="display:inline-block; padding:2px 8px; border-radius:999px; background:#fff7ed; color:#c2410c; border:1px solid #fed7aa;">Impact: {html.escape(str(example["impact"]))}</span>' if example["impact"] not in [None, "N/A", "None", "", "null"] and str(example["impact"]).strip() != "" else '')}
|
|
|
|
| 1957 |
example.get("evidence") not in [None, "N/A", "None", "", "null"] and str(example.get("evidence", "")).strip() != "",
|
| 1958 |
]) else ''}
|
| 1959 |
|
|
|
|
|
|
|
| 1960 |
<div style="margin-bottom: 15px;">
|
| 1961 |
<div style="border-radius: 6px; font-size: 15px; line-height: 1.5;">
|
| 1962 |
{conversation_html}
|