pappitti commited on
Commit
f263c36
·
1 Parent(s): e1d9d8f

upgrade for v3 of dataset, adding family and provider filters, improving responsiveness. Still needs polishing

Browse files
README.md CHANGED
@@ -34,9 +34,9 @@ This application explores datasets derived from xlr8harder's [Speechmap](https:/
34
 
35
  The underlying dataset from HuggingFace includes:
36
  * **2.4k questions**: [speechmap-questions](https://huggingface.co/datasets/PITTI/speechmap-questions)
37
- * **336k responses**: [speechmap-responses](https://huggingface.co/datasets/PITTI/speechmap-responses-v2)
38
- * **875k LLM-judge assessments**: [speechmap-assessments](https://huggingface.co/datasets/PITTI/speechmap-assessments-v2)
39
- * The assessment dataset combines the original assessments from `gpt-4o`, a new set from `mistral-small-3.1-2503`, a new set from `mistral-small-3.2-2506` and some manual annotations.
40
 
41
  ## Quick Start
42
 
 
34
 
35
  The underlying dataset from HuggingFace includes:
36
  * **2.4k questions**: [speechmap-questions](https://huggingface.co/datasets/PITTI/speechmap-questions)
37
+ * **369k responses**: [speechmap-responses](https://huggingface.co/datasets/PITTI/speechmap-responses-v3)
38
+ * **2.07k LLM-judge assessments**: [speechmap-assessments](https://huggingface.co/datasets/PITTI/speechmap-assessments-v3)
39
+ * The assessment dataset combines the original assessments from the Speechmap project by `gpt-4o`, assessments by `mistral-small-3.1-2503`, `mistral-small-3.2-2506`, `gemma3-27b-it`, `deepseek-v3.2`, `qwen3-next-80B-A3B-instruct` and manual annotations.
40
 
41
  ## Quick Start
42
 
api/mismatches.ts CHANGED
@@ -12,6 +12,8 @@ export default async function handler(req: IncomingMessage, res: ServerResponse)
12
  const j2_compliance = url.searchParams.get('toCategory');
13
  const theme = url.searchParams.get('theme') || null;
14
  const model = url.searchParams.get('model') || null;
 
 
15
 
16
  if (!judge1 || !j1_compliance || !judge2 || !j2_compliance || !judge1Classification || !judge2Classification) {
17
  return jsonResponse(res, 400, { error: 'judge1, j1_compliance, judge2, and j2_compliance are required.' });
@@ -22,47 +24,6 @@ export default async function handler(req: IncomingMessage, res: ServerResponse)
22
  }
23
 
24
  try {
25
- // const sql = `
26
- // WITH MismatchedResponses AS (
27
- // SELECT a.r_uuid
28
- // FROM assessments a
29
- // JOIN responses r ON a.r_uuid = r.uuid
30
- // JOIN questions q ON r.q_uuid = q.uuid
31
- // WHERE
32
- // a.judge IN (?, ?) AND
33
- // (? IS NULL OR q.theme = ?)
34
- // GROUP BY a.r_uuid
35
- // HAVING
36
- // SUM(CASE WHEN a.judge = ? AND a.${judge1Classification} = ? THEN 1 ELSE 0 END) > 0
37
- // AND
38
- // SUM(CASE WHEN a.judge = ? AND a.${judge2Classification} = ? THEN 1 ELSE 0 END) > 0
39
- // )
40
- // SELECT
41
- // r.uuid as r_uuid,
42
- // q.question,
43
- // q.theme as question_theme,
44
- // q.domain as question_domain,
45
- // r.model as response_model,
46
- // r.content as response_content,
47
- // a.judge,
48
- // a.${judge1Classification},
49
- // ${judge1Classification!=judge2Classification? `a.${judge2Classification}`:''}
50
- // a.judge_analysis
51
- // FROM MismatchedResponses mr
52
- // JOIN responses r ON mr.r_uuid = r.uuid
53
- // JOIN questions q ON r.q_uuid = q.uuid
54
- // JOIN assessments a ON mr.r_uuid = a.r_uuid
55
- // WHERE
56
- // a.judge IN (?, ?) -- Only get assessments from the two judges in question
57
- // ORDER BY r.uuid;
58
- // `;
59
- // const params = [
60
- // judge1, judge2,
61
- // theme, theme,
62
- // judge1, j1_compliance,
63
- // judge2, j2_compliance,
64
- // judge1, judge2
65
- // ];
66
 
67
  const sql = `
68
  SELECT
@@ -72,6 +33,7 @@ export default async function handler(req: IncomingMessage, res: ServerResponse)
72
  q.theme AS question_theme,
73
  q.domain AS question_domain,
74
  r.model AS response_model,
 
75
  r.content AS response_content,
76
 
77
  a1.judge AS judge1_name,
@@ -89,6 +51,7 @@ export default async function handler(req: IncomingMessage, res: ServerResponse)
89
  -- Join to get response and question details
90
  JOIN responses r ON a1.r_uuid = r.uuid
91
  JOIN questions q ON r.q_uuid = q.uuid
 
92
 
93
  WHERE
94
  -- Filter for the first judge's specific assessment
@@ -97,15 +60,18 @@ export default async function handler(req: IncomingMessage, res: ServerResponse)
97
  -- Filter for the second judge's specific assessment
98
  AND a2.judge = ? AND a2.${judge2Classification} = ?
99
 
100
- -- Optional theme filter and model filter
101
- AND (? IS NULL OR q.theme = ?) AND (? IS NULL OR r.model = ?);
 
102
  `;
103
 
104
  const params = [
105
  judge1, j1_compliance,
106
  judge2, j2_compliance,
107
  theme, theme,
108
- model, model
 
 
109
  ];
110
 
111
  const rows = await db.query<any>(sql, ...params);
@@ -122,6 +88,7 @@ export default async function handler(req: IncomingMessage, res: ServerResponse)
122
  model: row.response_model,
123
  r_uuid: row.r_uuid,
124
  response: row.response_content,
 
125
  assessments: {},
126
  });
127
  }
 
12
  const j2_compliance = url.searchParams.get('toCategory');
13
  const theme = url.searchParams.get('theme') || null;
14
  const model = url.searchParams.get('model') || null;
15
+ const modelFamily = url.searchParams.get('modelFamily') || null;
16
+ const apiProvider = url.searchParams.get('provider') || null;
17
 
18
  if (!judge1 || !j1_compliance || !judge2 || !j2_compliance || !judge1Classification || !judge2Classification) {
19
  return jsonResponse(res, 400, { error: 'judge1, j1_compliance, judge2, and j2_compliance are required.' });
 
24
  }
25
 
26
  try {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  const sql = `
29
  SELECT
 
33
  q.theme AS question_theme,
34
  q.domain AS question_domain,
35
  r.model AS response_model,
36
+ r.provider AS provider,
37
  r.content AS response_content,
38
 
39
  a1.judge AS judge1_name,
 
51
  -- Join to get response and question details
52
  JOIN responses r ON a1.r_uuid = r.uuid
53
  JOIN questions q ON r.q_uuid = q.uuid
54
+ JOIN models m ON r.model = m.name
55
 
56
  WHERE
57
  -- Filter for the first judge's specific assessment
 
60
  -- Filter for the second judge's specific assessment
61
  AND a2.judge = ? AND a2.${judge2Classification} = ?
62
 
63
+ -- Optional theme, model, family and provider filter
64
+ AND (? IS NULL OR q.theme = ?) AND (? IS NULL OR r.model = ?)
65
+ AND (? IS NULL OR m.family = ?) AND (? IS NULL OR r.provider = ?);
66
  `;
67
 
68
  const params = [
69
  judge1, j1_compliance,
70
  judge2, j2_compliance,
71
  theme, theme,
72
+ model, model,
73
+ modelFamily, modelFamily,
74
+ apiProvider, apiProvider
75
  ];
76
 
77
  const rows = await db.query<any>(sql, ...params);
 
88
  model: row.response_model,
89
  r_uuid: row.r_uuid,
90
  response: row.response_content,
91
+ provider: row.provider,
92
  assessments: {},
93
  });
94
  }
api/models_families.ts ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import type { IncomingMessage, ServerResponse } from 'http';
2
+ import db from '../src/lib/db.js';
3
+ import type { ModelFamily } from '../src/types.js';
4
+ import { jsonResponse } from './utils.js';
5
+
6
+
7
+ export default async function handler(_req: IncomingMessage, res: ServerResponse) {
8
+ try {
9
+ const sql = 'SELECT DISTINCT family FROM models ORDER BY family ASC';
10
+ const modelFamilies = await db.query<ModelFamily>(sql);
11
+ jsonResponse(res, 200, modelFamilies);
12
+ } catch (error) {
13
+ console.error('Failed to fetch model families:', error);
14
+ jsonResponse(res, 500, { error: 'Failed to fetch model families' });
15
+ }
16
+ }
api/providers.ts ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import type { IncomingMessage, ServerResponse } from 'http';
2
+ import db from '../src/lib/db.js';
3
+ import type { Provider } from '../src/types.js';
4
+ import { jsonResponse } from './utils.js';
5
+
6
+ export default async function handler(_req: IncomingMessage, res: ServerResponse) {
7
+ try {
8
+ const sql = 'SELECT DISTINCT provider FROM responses ORDER BY provider ASC';
9
+ const providers = await db.query<Provider>(sql);
10
+ jsonResponse(res, 200, providers);
11
+ } catch (error) {
12
+ console.error('Failed to fetch providers:', error);
13
+ jsonResponse(res, 500, { error: 'Failed to fetch providers' });
14
+ }
15
+ }
api/reclassification.ts CHANGED
@@ -10,6 +10,8 @@ export default async function handler(req: IncomingMessage, res: ServerResponse)
10
  const judge2Classification = url.searchParams.get('judge2Classification');
11
  const theme = url.searchParams.get('theme') || null;
12
  const model = url.searchParams.get('model') || null;
 
 
13
 
14
  if (!judge1 || !judge1Classification || !judge2 || !judge2Classification) {
15
  return jsonResponse(res, 400, { error: 'Query parameters judge1, judge1Classification, judge2, and judge2Classification are required.' });
@@ -29,14 +31,16 @@ export default async function handler(req: IncomingMessage, res: ServerResponse)
29
  JOIN assessments a2 ON a1.r_uuid = a2.r_uuid
30
  JOIN responses r ON a1.r_uuid = r.uuid
31
  JOIN questions q ON r.q_uuid = q.uuid
 
32
  WHERE
33
- a1.judge = ? AND a2.judge = ? AND (? IS NULL OR q.theme = ?) AND (? IS NULL OR r.model = ?)
 
34
  GROUP BY
35
  judge1_compliance,
36
  judge2_compliance;
37
  `;
38
 
39
- const params = [judge1, judge2, theme, theme, model, model];
40
 
41
  const rows = await db.query<{ judge1_compliance: string, judge2_compliance: string, count: number }>(sql, ...params);
42
 
 
10
  const judge2Classification = url.searchParams.get('judge2Classification');
11
  const theme = url.searchParams.get('theme') || null;
12
  const model = url.searchParams.get('model') || null;
13
+ const modelFamily = url.searchParams.get('modelFamily') || null;
14
+ const apiProvider = url.searchParams.get('provider') || null;
15
 
16
  if (!judge1 || !judge1Classification || !judge2 || !judge2Classification) {
17
  return jsonResponse(res, 400, { error: 'Query parameters judge1, judge1Classification, judge2, and judge2Classification are required.' });
 
31
  JOIN assessments a2 ON a1.r_uuid = a2.r_uuid
32
  JOIN responses r ON a1.r_uuid = r.uuid
33
  JOIN questions q ON r.q_uuid = q.uuid
34
+ JOIN models m ON r.model = m.name
35
  WHERE
36
+ a1.judge = ? AND a2.judge = ? AND (? IS NULL OR q.theme = ?)
37
+ AND (? IS NULL OR r.model = ?) AND (? IS NULL OR m.family = ?) AND (? IS NULL OR r.provider = ?)
38
  GROUP BY
39
  judge1_compliance,
40
  judge2_compliance;
41
  `;
42
 
43
+ const params = [judge1, judge2, theme, theme, model, model, modelFamily, modelFamily, apiProvider, apiProvider];
44
 
45
  const rows = await db.query<{ judge1_compliance: string, judge2_compliance: string, count: number }>(sql, ...params);
46
 
api/reclassified_list.ts DELETED
@@ -1,48 +0,0 @@
1
- import type { IncomingMessage, ServerResponse } from 'http';
2
- import db from '../src/lib/db.js';
3
- import { jsonResponse } from './utils.js';
4
-
5
- // This API endpoint fetches reclassified items by response uuid based on the judges and theme
6
- export default async function handler(req: IncomingMessage, res: ServerResponse) {
7
- // parsing query parameters from the URL
8
- const url = new URL(req.url!, `http://${req.headers.host}`);
9
- const judge1 = url.searchParams.get('judge1');
10
- const judge2 = url.searchParams.get('judge2');
11
- const theme = url.searchParams.get('theme') || null;
12
-
13
- if (!judge1 || !judge2) {
14
- return jsonResponse(res, 400, { error: 'judge1 and judge2 query parameters are required.' });
15
- }
16
-
17
- try {
18
- const sql = `
19
- SELECT a1.compliance AS judge1_compliance, a2.compliance AS judge2_compliance, r.uuid as r_uuid
20
- FROM assessments a1
21
- JOIN assessments a2 ON a1.r_uuid = a2.r_uuid
22
- JOIN responses r ON a1.r_uuid = r.uuid
23
- JOIN questions q ON r.q_uuid = q.uuid
24
- WHERE
25
- a1.judge = ? AND a2.judge = ? AND (? IS NULL OR q.theme = ?)
26
- ORDER BY r.uuid`;
27
-
28
- const rows = await db.query<{ judge1_compliance: string, judge2_compliance: string, r_uuid: string }>(
29
- sql, judge1, judge2, theme, theme
30
- );
31
-
32
- // matrix logic
33
- const transitionMatrix: Record<string, Record<string, string[]>> = {};
34
- for (const row of rows) {
35
- if (!transitionMatrix[row.judge1_compliance]) {
36
- transitionMatrix[row.judge1_compliance] = {};
37
- }
38
- if (!transitionMatrix[row.judge1_compliance][row.judge2_compliance]) {
39
- transitionMatrix[row.judge1_compliance][row.judge2_compliance] = [];
40
- }
41
- transitionMatrix[row.judge1_compliance][row.judge2_compliance].push(row.r_uuid);
42
- }
43
- jsonResponse(res, 200, transitionMatrix);
44
- } catch (error) {
45
- console.error('Failed to fetch reclassification data:', error);
46
- jsonResponse(res, 500, { error: 'Failed to fetch reclassification data' });
47
- }
48
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
api/satire.ts DELETED
@@ -1,90 +0,0 @@
1
- import type { IncomingMessage, ServerResponse } from 'http';
2
- import db from '../src/lib/db.js';
3
- import { jsonResponse } from './utils.js';
4
-
5
- // This API endpoint fetches responses for satirical content where judges disagree and at least one judge has marked it as 'COMPLETE'.
6
- export default async function handler(req: IncomingMessage, res: ServerResponse) {
7
- const url = new URL(req.url!, `http://${req.headers.host}`);
8
- const judge1 = url.searchParams.get('judge1');
9
- const judge2 = url.searchParams.get('judge2');
10
-
11
- try {
12
- const sql = `
13
- SELECT
14
- q.uuid AS q_uuid,
15
- q.question,
16
- q.theme,
17
- q.domain,
18
- r.uuid AS r_uuid,
19
- r.model AS r_model,
20
- r.content,
21
- -- Pivot the compliance for the first judge into its own column
22
- MAX(CASE WHEN a.judge = ? THEN a.uuid END) AS judge1_uuid,
23
- MAX(CASE WHEN a.judge = ? THEN a.judge END) AS judge1_model,
24
- MAX(CASE WHEN a.judge = ? THEN a.compliance END) AS judge1_compliance,
25
- MAX(CASE WHEN a.judge = ? THEN a.pitti_compliance END) AS judge1_pitti_compliance,
26
- MAX(CASE WHEN a.judge = ? THEN a.judge_analysis END) AS judge1_analysis,
27
- -- Pivot the compliance for the second judge into its own column
28
- MAX(CASE WHEN a.judge = ? THEN a.uuid END) AS judge2_uuid,
29
- MAX(CASE WHEN a.judge = ? THEN a.judge END) AS judge2_model,
30
- MAX(CASE WHEN a.judge = ? THEN a.compliance END) AS judge2_compliance,
31
- MAX(CASE WHEN a.judge = ? THEN a.pitti_compliance END) AS judge2_pitti_compliance,
32
- MAX(CASE WHEN a.judge = ? THEN a.judge_analysis END) AS judge2_analysis
33
- FROM questions q
34
- JOIN responses r ON r.q_uuid = q.uuid
35
- JOIN assessments a ON a.r_uuid = r.uuid
36
- WHERE
37
- q.question LIKE '%satirical%'
38
- AND a.judge IN (?, ?) -- Pre-filter for only the two judges in question
39
- GROUP BY
40
- q.uuid,
41
- q.question,
42
- q.theme,
43
- q.domain,
44
- r.uuid,
45
- r.model,
46
- r.content
47
- HAVING
48
- -- Condition 1: Ensure both judges have actually assessed this response.
49
- -- The COUNT will be 2 if both judge models are present in the group.
50
- COUNT(DISTINCT a.judge_model) = 2
51
- AND
52
- -- Condition 2: Check for a mismatch in compliance.
53
- -- This compares the two pivoted columns directly.
54
- judge1_compliance IS DISTINCT FROM judge2_compliance
55
- AND
56
- -- Condition 3: Ensure at least one of the judges has a compliance status of 'COMPLETE'.
57
- (judge1_compliance = 'COMPLETE' OR judge2_compliance = 'COMPLETE')
58
- ORDER BY
59
- q.uuid;
60
- `;
61
- const params = [
62
- judge1, judge1, judge1, judge1,
63
- judge2, judge2, judge2, judge2,
64
- judge1, judge2
65
- ];
66
- const rows = await db.query<{
67
- q_uuid: string,
68
- question: string,
69
- theme: string,
70
- domain: string,
71
- r_uuid: string,
72
- r_model: string,
73
- r_content: string,
74
- judge1_uuid: string,
75
- judge1_model: string,
76
- judge1_compliance: string,
77
- judge1_pitti_compliance: string,
78
- judge1_analysis: string,
79
- judge2_uuid: string,
80
- judge2_model: string,
81
- judge2_compliance: string,
82
- judge2_pitti_compliance: string,
83
- judge2_analysis: string
84
- }>(sql, ...params);
85
- jsonResponse(res, 200, rows);
86
- } catch (error) {
87
- console.error('Failed to fetch satires:', error);
88
- jsonResponse(res, 500, { error: 'Failed to fetch satires' });
89
- }
90
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
api/satire_domain.ts DELETED
@@ -1,102 +0,0 @@
1
- import type { IncomingMessage, ServerResponse } from 'http';
2
- import db from '../src/lib/db.js';
3
- import { jsonResponse } from './utils.js';
4
-
5
- interface SatireResponse {
6
- question: string;
7
- theme: string;
8
- domain: string;
9
- responses: number;
10
- }
11
-
12
- // This API endpoint fetches responses for satirical content where judges disagree and at least one judge has marked it as 'COMPLETE'.
13
- export default async function handler(req: IncomingMessage, res: ServerResponse) {
14
- const url = new URL(req.url!, `http://${req.headers.host}`);
15
- const judge1 = url.searchParams.get('judge1');
16
- const judge2 = url.searchParams.get('judge2');
17
-
18
- try {
19
- const sql = `
20
- SELECT
21
- q.uuid AS q_uuid,
22
- q.question,
23
- q.theme,
24
- q.domain,
25
- r.uuid AS r_uuid,
26
- -- Pivot the compliance for the first judge into its own column
27
- MAX(CASE WHEN a.judge = ? THEN a.compliance END) AS judge1_compliance,
28
- -- Pivot the compliance for the second judge into its own column
29
- MAX(CASE WHEN a.judge = ? THEN a.compliance END) AS judge2_compliance,
30
-
31
- FROM questions q
32
- JOIN responses r ON r.q_uuid = q.uuid
33
- JOIN assessments a ON a.r_uuid = r.uuid
34
- WHERE
35
- q.question LIKE '%satirical%'
36
- AND a.judge_model IN (?, ?) -- Pre-filter for only the two judges in question
37
- GROUP BY
38
- q.uuid,
39
- q.question,
40
- q.theme,
41
- q.domain,
42
- r.uuid,
43
- HAVING
44
- -- Condition 1: Ensure both judges have actually assessed this response.
45
- -- The COUNT will be 2 if both judge models are present in the group.
46
- COUNT(DISTINCT a.judge) = 2
47
- AND
48
- -- Condition 2: Check for a mismatch in compliance.
49
- -- This compares the two pivoted columns directly.
50
- judge1_compliance IS DISTINCT FROM judge2_compliance
51
- AND
52
- -- Condition 3: Ensure at least one of the judges has a compliance status of 'COMPLETE'.
53
- (judge1_compliance = 'COMPLETE' OR judge2_compliance = 'COMPLETE')
54
- ORDER BY
55
- q.uuid;
56
- `;
57
- const params = [
58
- judge1, judge2,
59
- judge1, judge2
60
- ];
61
- const rows = await db.query<{
62
- q_uuid: string,
63
- question: string,
64
- theme: string,
65
- domain: string,
66
- r_uuid: string,
67
- judge1_compliance: string,
68
- judge2_compliance: string
69
- }>(sql, ...params);
70
-
71
- const satiricalQuestions : Record<string, Record<string,SatireResponse>> = {};
72
-
73
- for (const row of rows) {
74
- if (!satiricalQuestions[row.domain]) {
75
- satiricalQuestions[row.domain] = {
76
- [row.q_uuid]: {
77
- question: row.question,
78
- theme: row.theme,
79
- domain: row.domain,
80
- responses: 1
81
- }
82
- };
83
- }
84
- else if (!satiricalQuestions[row.domain][row.q_uuid]) {
85
- satiricalQuestions[row.domain][row.q_uuid] = {
86
- question: row.question,
87
- theme: row.theme,
88
- domain: row.domain,
89
- responses: 1
90
- };
91
- }
92
- else {
93
- satiricalQuestions[row.domain][row.q_uuid].responses += 1;
94
- }
95
- }
96
-
97
- jsonResponse(res, 200, satiricalQuestions);
98
- } catch (error) {
99
- console.error('Failed to fetch satires:', error);
100
- jsonResponse(res, 500, { error: 'Failed to fetch satires' });
101
- }
102
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
api/satire_q.ts DELETED
@@ -1,92 +0,0 @@
1
- import type { IncomingMessage, ServerResponse } from 'http';
2
- import db from '../src/lib/db.js';
3
- import { jsonResponse } from './utils.js';
4
-
5
- interface SatireResponse {
6
- question: string;
7
- theme: string;
8
- domain: string;
9
- responses: number;
10
- }
11
-
12
- // This API endpoint fetches responses for satirical content where judges disagree and at least one judge has marked it as 'COMPLETE'.
13
- export default async function handler(req: IncomingMessage, res: ServerResponse) {
14
- const url = new URL(req.url!, `http://${req.headers.host}`);
15
- const judge1 = url.searchParams.get('judge1');
16
- const judge2 = url.searchParams.get('judge2');
17
-
18
- try {
19
- const sql = `
20
- SELECT
21
- q.uuid AS q_uuid,
22
- q.question,
23
- q.theme,
24
- q.domain,
25
- r.uuid AS r_uuid,
26
- -- Pivot the compliance for the first judge into its own column
27
- MAX(CASE WHEN a.judge_model = ? THEN a.compliance END) AS judge1_compliance,
28
- -- Pivot the compliance for the second judge into its own column
29
- MAX(CASE WHEN a.judge_model = ? THEN a.compliance END) AS judge2_compliance,
30
-
31
- FROM questions q
32
- JOIN responses r ON r.q_uuid = q.uuid
33
- JOIN assessments a ON a.r_uuid = r.uuid
34
- WHERE
35
- q.question LIKE '%satirical%'
36
- AND a.judge_model IN (?, ?) -- Pre-filter for only the two judges in question
37
- GROUP BY
38
- q.uuid,
39
- q.question,
40
- q.theme,
41
- q.domain,
42
- r.uuid,
43
- HAVING
44
- -- Condition 1: Ensure both judges have actually assessed this response.
45
- -- The COUNT will be 2 if both judge models are present in the group.
46
- COUNT(DISTINCT a.judge_model) = 2
47
- AND
48
- -- Condition 2: Check for a mismatch in compliance.
49
- -- This compares the two pivoted columns directly.
50
- judge1_compliance IS DISTINCT FROM judge2_compliance
51
- AND
52
- -- Condition 3: Ensure at least one of the judges has a compliance status of 'COMPLETE'.
53
- (judge1_compliance = 'COMPLETE' OR judge2_compliance = 'COMPLETE')
54
- ORDER BY
55
- q.uuid;
56
- `;
57
- const params = [
58
- judge1, judge2,
59
- judge1, judge2
60
- ];
61
- const rows = await db.query<{
62
- q_uuid: string,
63
- question: string,
64
- theme: string,
65
- domain: string,
66
- r_uuid: string,
67
- judge1_compliance: string,
68
- judge2_compliance: string
69
- }>(sql, ...params);
70
-
71
- const satiricalQuestions : Record<string, SatireResponse> = {};
72
-
73
- for (const row of rows) {
74
- if (!satiricalQuestions[row.q_uuid]) {
75
- satiricalQuestions[row.q_uuid] = {
76
- question: row.question,
77
- theme: row.theme,
78
- domain: row.domain,
79
- responses: 1
80
- };
81
- }
82
- else {
83
- satiricalQuestions[row.q_uuid].responses += 1;
84
- }
85
- }
86
-
87
- jsonResponse(res, 200, Object.values(satiricalQuestions));
88
- } catch (error) {
89
- console.error('Failed to fetch satires:', error);
90
- jsonResponse(res, 500, { error: 'Failed to fetch satires' });
91
- }
92
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/App.tsx CHANGED
@@ -3,9 +3,9 @@ import { useState, useEffect} from 'react';
3
  import SankeyDiagram from './components/Sankey.js';
4
  import Heatmap from './components/Heatmap.js';
5
  import AssessmentItems from './components/itemList.js';
6
- import { getThemes, getJudges, getReclassificationData, getAssessmentItems, getModels } from './utils/apiUtils.js';
7
  import { modelSort } from './utils/chartUtils.js';
8
- import type { Theme, Judges, Model, SelectedJudge, TransitionMatrix, AssessmentItem } from './types';
9
  import FilterBar from './components/Filterbar';
10
 
11
  function App() {
@@ -13,6 +13,8 @@ function App() {
13
  const [themes, setThemes] = useState<Theme[]>([]);
14
  const [judges, setJudges] = useState<Judges[]>([]);
15
  const [models, setModels] = useState<Model[]>([]);
 
 
16
  const [matrix, setMatrix] = useState<TransitionMatrix | null>(null);
17
  const [error, setError] = useState<string | null>(null);
18
  const [isLoading, setIsLoading] = useState(false);
@@ -20,6 +22,8 @@ function App() {
20
 
21
  const [selectedTheme, setSelectedTheme] = useState<string>('');
22
  const [selectedModel, setSelectedModel] = useState<string>('');
 
 
23
  const [selectedJudge1, setSelectedJudge1] = useState<SelectedJudge | null >(null);
24
  const [selectedJudge2, setSelectedJudge2] = useState<SelectedJudge | null >(null);
25
  const [selectedCategory, setSelectedCategory] = useState<string[] | null>(null);
@@ -33,14 +37,18 @@ function App() {
33
  useEffect(() => {
34
  const loadFilters = async () => {
35
  try {
36
- const [themesData, judgesData, modelsData] = await Promise.all([
37
  getThemes(),
38
  getJudges(),
39
- getModels()
 
 
40
  ]);
41
  setThemes(themesData);
42
  setJudges(judgesData.sort(modelSort));
43
  setModels(modelsData);
 
 
44
 
45
  // Set default selections
46
  if (judgesData.length >= 2) {
@@ -77,7 +85,9 @@ function App() {
77
  selectedJudge2.name,
78
  selectedJudge2.classification,
79
  selectedTheme,
80
- selectedModel
 
 
81
  );
82
  setMatrix(result);
83
  setSelectedItems([]);
@@ -90,7 +100,7 @@ function App() {
90
  };
91
 
92
  fetchData();
93
- }, [selectedTheme, selectedJudge1, selectedJudge2, selectedModel]);
94
 
95
  const handleJudge1NameChange = (newName: string) => {
96
  const newJudge = judges.find(j => j.name === newName);
@@ -138,7 +148,9 @@ function App() {
138
  selectedJudge2.classification,
139
  toCategory,
140
  selectedTheme,
141
- selectedModel
 
 
142
  )
143
  setSelectedItems(items);
144
  setSelectedCategory([fromCategory, toCategory]);
@@ -171,10 +183,16 @@ function App() {
171
  themes={themes}
172
  judges={judges}
173
  models={models}
 
 
174
  selectedTheme={selectedTheme}
175
  onThemeChange={setSelectedTheme}
176
  selectedModel={selectedModel}
177
  onModelChange={setSelectedModel}
 
 
 
 
178
  selectedJudge1={selectedJudge1}
179
  selectedJudge2={selectedJudge2}
180
  onJudge1NameChange={handleJudge1NameChange}
 
3
  import SankeyDiagram from './components/Sankey.js';
4
  import Heatmap from './components/Heatmap.js';
5
  import AssessmentItems from './components/itemList.js';
6
+ import { getThemes, getJudges, getModelFamilies, getProviders, getReclassificationData, getAssessmentItems, getModels } from './utils/apiUtils.js';
7
  import { modelSort } from './utils/chartUtils.js';
8
+ import type { Theme, Judges, Model, ModelFamily, Provider, SelectedJudge, TransitionMatrix, AssessmentItem } from './types';
9
  import FilterBar from './components/Filterbar';
10
 
11
  function App() {
 
13
  const [themes, setThemes] = useState<Theme[]>([]);
14
  const [judges, setJudges] = useState<Judges[]>([]);
15
  const [models, setModels] = useState<Model[]>([]);
16
+ const [modelFamilies, setModelFamilies] = useState<ModelFamily[]>([]);
17
+ const [providers, setProviders] = useState<Provider[]>([]);
18
  const [matrix, setMatrix] = useState<TransitionMatrix | null>(null);
19
  const [error, setError] = useState<string | null>(null);
20
  const [isLoading, setIsLoading] = useState(false);
 
22
 
23
  const [selectedTheme, setSelectedTheme] = useState<string>('');
24
  const [selectedModel, setSelectedModel] = useState<string>('');
25
+ const [selectedModelFamily, setSelectedModelFamily] = useState<string>('');
26
+ const [selectedProvider, setSelectedProvider] = useState<string>('');
27
  const [selectedJudge1, setSelectedJudge1] = useState<SelectedJudge | null >(null);
28
  const [selectedJudge2, setSelectedJudge2] = useState<SelectedJudge | null >(null);
29
  const [selectedCategory, setSelectedCategory] = useState<string[] | null>(null);
 
37
  useEffect(() => {
38
  const loadFilters = async () => {
39
  try {
40
+ const [themesData, judgesData, modelsData, modelFamiliesData, providersData] = await Promise.all([
41
  getThemes(),
42
  getJudges(),
43
+ getModels(),
44
+ getModelFamilies(),
45
+ getProviders(),
46
  ]);
47
  setThemes(themesData);
48
  setJudges(judgesData.sort(modelSort));
49
  setModels(modelsData);
50
+ setModelFamilies(modelFamiliesData);
51
+ setProviders(providersData);
52
 
53
  // Set default selections
54
  if (judgesData.length >= 2) {
 
85
  selectedJudge2.name,
86
  selectedJudge2.classification,
87
  selectedTheme,
88
+ selectedModel,
89
+ selectedModelFamily,
90
+ selectedProvider
91
  );
92
  setMatrix(result);
93
  setSelectedItems([]);
 
100
  };
101
 
102
  fetchData();
103
+ }, [selectedTheme, selectedJudge1, selectedJudge2, selectedModel, selectedModelFamily, selectedProvider]);
104
 
105
  const handleJudge1NameChange = (newName: string) => {
106
  const newJudge = judges.find(j => j.name === newName);
 
148
  selectedJudge2.classification,
149
  toCategory,
150
  selectedTheme,
151
+ selectedModel,
152
+ selectedModelFamily,
153
+ selectedProvider
154
  )
155
  setSelectedItems(items);
156
  setSelectedCategory([fromCategory, toCategory]);
 
183
  themes={themes}
184
  judges={judges}
185
  models={models}
186
+ modelFamilies={modelFamilies}
187
+ providers={providers}
188
  selectedTheme={selectedTheme}
189
  onThemeChange={setSelectedTheme}
190
  selectedModel={selectedModel}
191
  onModelChange={setSelectedModel}
192
+ selectedModelFamily={selectedModelFamily}
193
+ onModelFamilyChange={setSelectedModelFamily}
194
+ selectedProvider={selectedProvider}
195
+ onProviderChange={setSelectedProvider}
196
  selectedJudge1={selectedJudge1}
197
  selectedJudge2={selectedJudge2}
198
  onJudge1NameChange={handleJudge1NameChange}
src/components/Filterbar.tsx CHANGED
@@ -1,4 +1,4 @@
1
- import type { FilterBarProps, Judges, Model } from '../types.js';
2
 
3
  const findJudgeByName = (judges: Judges[], name: string) => judges.find(j => j.name === name);
4
 
@@ -6,10 +6,16 @@ const FilterBar: React.FC<FilterBarProps> = ({
6
  themes,
7
  judges,
8
  models,
 
 
9
  selectedTheme,
10
  onThemeChange,
11
  selectedModel,
12
  onModelChange,
 
 
 
 
13
  selectedJudge1,
14
  selectedJudge2,
15
  onJudge1NameChange,
@@ -55,6 +61,23 @@ const FilterBar: React.FC<FilterBarProps> = ({
55
  ))}
56
  </select>
57
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  </div>
59
 
60
  <div className="filter-group">
@@ -92,6 +115,23 @@ const FilterBar: React.FC<FilterBarProps> = ({
92
  ))}
93
  </select>
94
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  </div>
96
 
97
  <div className="filter-group">
 
1
+ import type { FilterBarProps, Judges } from '../types.js';
2
 
3
  const findJudgeByName = (judges: Judges[], name: string) => judges.find(j => j.name === name);
4
 
 
6
  themes,
7
  judges,
8
  models,
9
+ modelFamilies,
10
+ providers,
11
  selectedTheme,
12
  onThemeChange,
13
  selectedModel,
14
  onModelChange,
15
+ selectedModelFamily,
16
+ onModelFamilyChange,
17
+ selectedProvider,
18
+ onProviderChange,
19
  selectedJudge1,
20
  selectedJudge2,
21
  onJudge1NameChange,
 
61
  ))}
62
  </select>
63
  </div>
64
+
65
+ <div className="filter-block">
66
+ <label className="filter-label" htmlFor='model-family-select'>Model Family</label>
67
+ <select
68
+ className="filter-select"
69
+ id='model-family-select'
70
+ value={selectedModelFamily}
71
+ onChange={(e) => onModelFamilyChange(e.target.value)}
72
+ >
73
+ <option value="">All Families</option>
74
+ {modelFamilies.map((family) => (
75
+ <option key={family.family} value={family.family}>
76
+ {family.family}
77
+ </option>
78
+ ))}
79
+ </select>
80
+ </div>
81
  </div>
82
 
83
  <div className="filter-group">
 
115
  ))}
116
  </select>
117
  </div>
118
+
119
+ <div className="filter-block">
120
+ <label className="filter-label" htmlFor='provider-select'>Provider</label>
121
+ <select
122
+ className="filter-select"
123
+ id='provider-select'
124
+ value={selectedProvider}
125
+ onChange={(e) => onProviderChange(e.target.value)}
126
+ >
127
+ <option value="">All Providers</option>
128
+ {providers.map((provider) => (
129
+ <option key={provider.provider} value={provider.provider}>
130
+ {provider.provider}
131
+ </option>
132
+ ))}
133
+ </select>
134
+ </div>
135
  </div>
136
 
137
  <div className="filter-group">
src/components/itemList.tsx CHANGED
@@ -216,7 +216,7 @@ const AssessmentItem: React.FC<AssessmentItemProps> = memo(({
216
  </div>
217
 
218
  <div className="item-answer">
219
- <h4>LLM Response ({item.model})</h4>
220
  <div className='markdown-content'><ReactMarkdown>{item.response}</ReactMarkdown></div>
221
  </div>
222
 
@@ -257,7 +257,7 @@ const AssessmentItem: React.FC<AssessmentItemProps> = memo(({
257
  </div>
258
 
259
  {/* HUMAN ASSESSMENT */}
260
- <div className="third-assessment">
261
  <div>
262
  <h4>Provide Human Assessment</h4>
263
  <p className="assessment-hint">Click to copy assessment info for response ID: <code>{item.r_uuid}</code></p>
@@ -293,7 +293,7 @@ const AssessmentItem: React.FC<AssessmentItemProps> = memo(({
293
  {copied ? '✓ Copied!' : 'Copy'}
294
  </button>
295
  </div>
296
- )}
297
  {/* HUMAN ASSESSMENT */}
298
  </div>
299
  );
 
216
  </div>
217
 
218
  <div className="item-answer">
219
+ <h4>LLM Response ({item.model}{item.provider ? ` - ${item.provider}` : ''})</h4>
220
  <div className='markdown-content'><ReactMarkdown>{item.response}</ReactMarkdown></div>
221
  </div>
222
 
 
257
  </div>
258
 
259
  {/* HUMAN ASSESSMENT */}
260
+ {/*<div className="third-assessment">
261
  <div>
262
  <h4>Provide Human Assessment</h4>
263
  <p className="assessment-hint">Click to copy assessment info for response ID: <code>{item.r_uuid}</code></p>
 
293
  {copied ? '✓ Copied!' : 'Copy'}
294
  </button>
295
  </div>
296
+ )} */}
297
  {/* HUMAN ASSESSMENT */}
298
  </div>
299
  );
src/index.css CHANGED
@@ -571,6 +571,7 @@ button {
571
  display: flex;
572
  gap: 12px;
573
  flex-wrap: wrap;
 
574
  }
575
 
576
  .meta-tag {
@@ -581,6 +582,8 @@ button {
581
  border-radius: 20px;
582
  font-size: 0.875rem;
583
  font-weight: 500;
 
 
584
  }
585
 
586
  .meta-tag.theme {
@@ -624,6 +627,7 @@ h4 {
624
  border-radius: 8px;
625
  padding: 1.5rem;
626
  border: 1px solid #e5e7eb;
 
627
  }
628
 
629
  .assessment-header {
@@ -694,6 +698,12 @@ h4 {
694
  font-size: 0.875rem;
695
  }
696
 
 
 
 
 
 
 
697
  .third-assessment {
698
  display:flex;
699
  flex-direction: column;
@@ -989,6 +999,18 @@ h4 {
989
  padding: 1rem;
990
  min-width: 330px;
991
  }
 
 
 
 
 
 
 
 
 
 
 
 
992
  }
993
 
994
  /* Animation for loading states */
 
571
  display: flex;
572
  gap: 12px;
573
  flex-wrap: wrap;
574
+ max-width: 100%;
575
  }
576
 
577
  .meta-tag {
 
582
  border-radius: 20px;
583
  font-size: 0.875rem;
584
  font-weight: 500;
585
+ overflow: hidden;
586
+ max-width: 100%;
587
  }
588
 
589
  .meta-tag.theme {
 
627
  border-radius: 8px;
628
  padding: 1.5rem;
629
  border: 1px solid #e5e7eb;
630
+ max-width: 100%;
631
  }
632
 
633
  .assessment-header {
 
698
  font-size: 0.875rem;
699
  }
700
 
701
+ .assessment-analysis pre {
702
+ white-space: pre-wrap; /* Preserves newlines and spaces, but wraps text */
703
+ word-wrap: break-word; /* Deprecated, but good fallback */
704
+ overflow-wrap: break-word; /* Breaks long words that would otherwise overflow */
705
+ }
706
+
707
  .third-assessment {
708
  display:flex;
709
  flex-direction: column;
 
999
  padding: 1rem;
1000
  min-width: 330px;
1001
  }
1002
+
1003
+ .assessment-items {
1004
+ padding: 1rem;
1005
+ }
1006
+
1007
+ .assessment-item {
1008
+ padding: 1rem;
1009
+ }
1010
+
1011
+ .assessment {
1012
+ max-width: 100%;
1013
+ }
1014
  }
1015
 
1016
  /* Animation for loading states */
src/lib/ingest.ts CHANGED
@@ -8,10 +8,10 @@ const ROOT_DIR = process.cwd(); // cwd() = Current Working Directory
8
  const DB_PATH = path.join(ROOT_DIR, 'database.duckdb');
9
 
10
  export const DATA_SOURCES = {
 
11
  questions: 'https://huggingface.co/datasets/PITTI/speechmap-questions/resolve/main/consolidated_questions.parquet',
12
- responses: 'https://huggingface.co/datasets/PITTI/speechmap-responses-v2/resolve/main/consolidated_responses.parquet',
13
- assessments: 'https://huggingface.co/datasets/PITTI/speechmap-assessments-v2/resolve/main/consolidated_assessments.parquet',
14
- // manual : './data/manual_assessments.parquet',// Local file for manual assessments
15
  // reviewed : './data/reviewed_assessments.parquet' // Local file for reviewed assessments
16
  };
17
 
@@ -44,10 +44,12 @@ async function rebuildDatabase() {
44
 
45
  console.log('Creating database schema...');
46
  await query(db, `
47
- CREATE TABLE themes (slug VARCHAR PRIMARY KEY, name VARCHAR);
48
- CREATE TABLE questions (uuid VARCHAR PRIMARY KEY, id VARCHAR, category VARCHAR, domain VARCHAR, question VARCHAR, theme VARCHAR);
49
- CREATE TABLE responses (uuid VARCHAR PRIMARY KEY, q_uuid VARCHAR, model VARCHAR, timestamp VARCHAR, api_provider VARCHAR, provider VARCHAR, content VARCHAR, matched BOOLEAN, origin VARCHAR);
50
- CREATE TABLE assessments (uuid VARCHAR PRIMARY KEY, q_uuid VARCHAR, r_uuid VARCHAR, judge VARCHAR, judge_type VARCHAR, judge_analysis VARCHAR, compliance VARCHAR, pitti_compliance VARCHAR, origin VARCHAR);
 
 
51
  `);
52
  console.log('Schema created.');
53
 
@@ -60,6 +62,12 @@ async function rebuildDatabase() {
60
  await query(db, `CREATE INDEX idx_questions_theme ON questions (theme);`);
61
  console.log('Indexes created.');
62
 
 
 
 
 
 
 
63
  console.log('Ingesting themes and questions...');
64
  await query(db, `
65
  INSERT INTO themes (slug, name)
@@ -81,26 +89,7 @@ async function rebuildDatabase() {
81
  SELECT uuid, q_uuid, r_uuid, judge, judge_type, judge_analysis, compliance, pitti_compliance, origin FROM read_parquet('${DATA_SOURCES.assessments}');
82
  `);
83
 
84
- // console.log('Updating manual assessments from local parquet file...');
85
- // const manualDataPath = path.resolve(ROOT_DIR, DATA_SOURCES.manual);
86
- // if (fs.existsSync(manualDataPath)) {
87
- // await query(db, `
88
- // INSERT INTO assessments (uuid, q_uuid, r_uuid, judge, judge_type, judge_analysis, compliance, pitti_compliance, origin)
89
- // SELECT uuid, q_uuid, r_uuid, judge, judge_type, judge_analysis, compliance, pitti_compliance, origin FROM read_parquet('${manualDataPath}')
90
- // ON CONFLICT (uuid) DO UPDATE SET
91
- // q_uuid = excluded.q_uuid,
92
- // r_uuid = excluded.r_uuid,
93
- // judge = excluded.judge,
94
- // judge_type = excluded.judge_type,
95
- // judge_analysis = excluded.judge_analysis,
96
- // compliance = excluded.compliance,
97
- // pitti_compliance = excluded.pitti_compliance,
98
- // origin = excluded.origin;
99
- // `);
100
- // } else {
101
- // console.warn(`Manual assessments file not found at ${manualDataPath}, skipping...`);
102
- // }
103
-
104
  // console.log('Updating reviewed assessments from local parquet file...');
105
  // const reviewedDataPath = path.resolve(ROOT_DIR, DATA_SOURCES.reviewed);
106
  // if (fs.existsSync(reviewedDataPath)) {
 
8
  const DB_PATH = path.join(ROOT_DIR, 'database.duckdb');
9
 
10
  export const DATA_SOURCES = {
11
+ models : 'https://huggingface.co/datasets/PITTI/speechmap-models/resolve/main/all_models.parquet',
12
  questions: 'https://huggingface.co/datasets/PITTI/speechmap-questions/resolve/main/consolidated_questions.parquet',
13
+ responses: 'https://huggingface.co/datasets/PITTI/speechmap-responses-v3/resolve/main/consolidated_responses.parquet',
14
+ assessments: 'https://huggingface.co/datasets/PITTI/speechmap-assessments-v3/resolve/main/consolidated_assessments.parquet',
 
15
  // reviewed : './data/reviewed_assessments.parquet' // Local file for reviewed assessments
16
  };
17
 
 
44
 
45
  console.log('Creating database schema...');
46
  await query(db, `
47
+ CREATE TABLE models (name VARCHAR PRIMARY KEY, family VARCHAR);
48
+ CREATE TABLE themes (slug VARCHAR PRIMARY KEY, name VARCHAR);
49
+ CREATE TABLE questions (uuid VARCHAR PRIMARY KEY, id VARCHAR, category VARCHAR, domain VARCHAR, question VARCHAR, theme VARCHAR);
50
+ CREATE TABLE responses (uuid VARCHAR PRIMARY KEY, q_uuid VARCHAR, model VARCHAR, timestamp VARCHAR, api_provider VARCHAR, provider VARCHAR, content VARCHAR, matched BOOLEAN, origin VARCHAR);
51
+ CREATE TABLE assessments (uuid VARCHAR PRIMARY KEY, q_uuid VARCHAR, r_uuid VARCHAR, judge VARCHAR, judge_type VARCHAR, judge_analysis VARCHAR, compliance VARCHAR, pitti_compliance VARCHAR, origin VARCHAR);
52
+
53
  `);
54
  console.log('Schema created.');
55
 
 
62
  await query(db, `CREATE INDEX idx_questions_theme ON questions (theme);`);
63
  console.log('Indexes created.');
64
 
65
+ console.log('Ingesting models...');
66
+ await query(db, `
67
+ INSERT INTO models (name, family)
68
+ SELECT name, family FROM read_parquet('${DATA_SOURCES.models}');
69
+ `);
70
+
71
  console.log('Ingesting themes and questions...');
72
  await query(db, `
73
  INSERT INTO themes (slug, name)
 
89
  SELECT uuid, q_uuid, r_uuid, judge, judge_type, judge_analysis, compliance, pitti_compliance, origin FROM read_parquet('${DATA_SOURCES.assessments}');
90
  `);
91
 
92
+ // EXAMPLE: Ingest reviewed assessments from local Parquet file (if exists)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  // console.log('Updating reviewed assessments from local parquet file...');
94
  // const reviewedDataPath = path.resolve(ROOT_DIR, DATA_SOURCES.reviewed);
95
  // if (fs.existsSync(reviewedDataPath)) {
src/types.ts CHANGED
@@ -40,6 +40,14 @@ export interface Model {
40
  model:string;
41
  }
42
 
 
 
 
 
 
 
 
 
43
  export interface Judges {
44
  name: string;
45
  judge_type: string; // human or LLM
@@ -75,6 +83,7 @@ export interface AssessmentItem {
75
  r_uuid: string;
76
  response: string;
77
  model: string;
 
78
  assessments: Record<string,JudgeAssessment>;
79
  }
80
 
@@ -82,10 +91,16 @@ export interface FilterBarProps {
82
  themes: Theme[];
83
  judges: Judges[];
84
  models: Model[];
 
 
85
  selectedTheme: string;
86
  onThemeChange: (value: string) => void;
87
  selectedModel: string;
88
  onModelChange: (value: string) => void;
 
 
 
 
89
  selectedJudge1: SelectedJudge | null;
90
  selectedJudge2: SelectedJudge | null;
91
  onJudge1NameChange : (value: string) => void;
 
40
  model:string;
41
  }
42
 
43
+ export interface ModelFamily {
44
+ family:string;
45
+ }
46
+
47
+ export interface Provider {
48
+ provider:string;
49
+ }
50
+
51
  export interface Judges {
52
  name: string;
53
  judge_type: string; // human or LLM
 
83
  r_uuid: string;
84
  response: string;
85
  model: string;
86
+ provider: string;
87
  assessments: Record<string,JudgeAssessment>;
88
  }
89
 
 
91
  themes: Theme[];
92
  judges: Judges[];
93
  models: Model[];
94
+ modelFamilies: ModelFamily[];
95
+ providers: Provider[];
96
  selectedTheme: string;
97
  onThemeChange: (value: string) => void;
98
  selectedModel: string;
99
  onModelChange: (value: string) => void;
100
+ selectedModelFamily: string;
101
+ onModelFamilyChange: (value: string) => void;
102
+ selectedProvider: string;
103
+ onProviderChange: (value: string) => void;
104
  selectedJudge1: SelectedJudge | null;
105
  selectedJudge2: SelectedJudge | null;
106
  onJudge1NameChange : (value: string) => void;
src/utils/apiUtils.ts CHANGED
@@ -1,4 +1,4 @@
1
- import type { Theme, Judges, Model, TransitionMatrix, AssessmentItem, ApiError } from '../types.js';
2
 
3
 
4
 
@@ -17,12 +17,10 @@ async function fetchAPI<T>(url: string, options?: RequestInit): Promise<T> {
17
 
18
 
19
  // --- API Functions ---
20
-
21
  export const getThemes = (): Promise<Theme[]> => {
22
  return fetchAPI<Theme[]>('/api/themes');
23
  };
24
 
25
-
26
  export const getJudges = async (): Promise<Judges[]> => {
27
  return fetchAPI<Judges[]>('/api/judges')
28
  };
@@ -31,6 +29,13 @@ export const getModels = async (): Promise<Model[]> => {
31
  return fetchAPI<Model[]>('/api/models')
32
  }
33
 
 
 
 
 
 
 
 
34
 
35
  export const getReclassificationData = (
36
  judge1: string,
@@ -38,7 +43,9 @@ export const getReclassificationData = (
38
  judge2: string,
39
  judge2Classification: string,
40
  theme?: string,
41
- model?: string
 
 
42
  ): Promise<TransitionMatrix> => {
43
  // Build the query string from the parameters
44
  const params = new URLSearchParams({
@@ -55,6 +62,12 @@ export const getReclassificationData = (
55
  if (model) {
56
  params.append('model', model);
57
  }
 
 
 
 
 
 
58
 
59
  return fetchAPI<TransitionMatrix>(`/api/reclassification?${params.toString()}`);
60
  };
@@ -68,7 +81,9 @@ export const getAssessmentItems = (
68
  judge2Classification: string,
69
  toCategory: string,
70
  theme?: string,
71
- model?: string
 
 
72
  ): Promise<any[]> => {
73
 
74
  const params = new URLSearchParams({
@@ -77,7 +92,7 @@ export const getAssessmentItems = (
77
  fromCategory,
78
  judge2,
79
  judge2Classification,
80
- toCategory,
81
  });
82
 
83
  if (theme) {
@@ -86,6 +101,12 @@ export const getAssessmentItems = (
86
  if (model) {
87
  params.append('model', model);
88
  }
 
 
 
 
 
 
89
 
90
  return fetchAPI<AssessmentItem[]>(`/api/mismatches?${params.toString()}`);
91
  }
 
1
+ import type { Theme, Judges, Model, ModelFamily, Provider, TransitionMatrix, AssessmentItem, ApiError } from '../types.js';
2
 
3
 
4
 
 
17
 
18
 
19
  // --- API Functions ---
 
20
  export const getThemes = (): Promise<Theme[]> => {
21
  return fetchAPI<Theme[]>('/api/themes');
22
  };
23
 
 
24
  export const getJudges = async (): Promise<Judges[]> => {
25
  return fetchAPI<Judges[]>('/api/judges')
26
  };
 
29
  return fetchAPI<Model[]>('/api/models')
30
  }
31
 
32
+ export const getModelFamilies = async (): Promise<ModelFamily[]> => {
33
+ return fetchAPI<ModelFamily[]>('/api/models_families')
34
+ }
35
+
36
+ export const getProviders = async (): Promise<Provider[]> => {
37
+ return fetchAPI<Provider[]>('/api/providers')
38
+ }
39
 
40
  export const getReclassificationData = (
41
  judge1: string,
 
43
  judge2: string,
44
  judge2Classification: string,
45
  theme?: string,
46
+ model?: string,
47
+ modelFamily?: string,
48
+ provider?: string
49
  ): Promise<TransitionMatrix> => {
50
  // Build the query string from the parameters
51
  const params = new URLSearchParams({
 
62
  if (model) {
63
  params.append('model', model);
64
  }
65
+ if (modelFamily) {
66
+ params.append('modelFamily', modelFamily);
67
+ }
68
+ if (provider) {
69
+ params.append('provider', provider);
70
+ }
71
 
72
  return fetchAPI<TransitionMatrix>(`/api/reclassification?${params.toString()}`);
73
  };
 
81
  judge2Classification: string,
82
  toCategory: string,
83
  theme?: string,
84
+ model?: string,
85
+ modelFamily?: string,
86
+ provider?: string
87
  ): Promise<any[]> => {
88
 
89
  const params = new URLSearchParams({
 
92
  fromCategory,
93
  judge2,
94
  judge2Classification,
95
+ toCategory
96
  });
97
 
98
  if (theme) {
 
101
  if (model) {
102
  params.append('model', model);
103
  }
104
+ if (modelFamily) {
105
+ params.append('modelFamily', modelFamily);
106
+ }
107
+ if (provider) {
108
+ params.append('provider', provider);
109
+ }
110
 
111
  return fetchAPI<AssessmentItem[]>(`/api/mismatches?${params.toString()}`);
112
  }
src/utils/chartUtils.ts CHANGED
@@ -6,7 +6,7 @@ const CATEGORY_SORT_MAP = new Map(
6
  SORTED_CATEGORIES.map((category, index) => [category, index])
7
  );
8
 
9
- const SORTED_MODELS = ["openai/gpt-4o-2024-11-20", "mistral-small-3.2-24b-instruct-2506-q8", "mistral-small-3.1-24b-instruct-2503", 'pitti/pap'];
10
 
11
  const MODEL_SORT_MAP = new Map(
12
  SORTED_MODELS.map((model, index) => [model, index])
 
6
  SORTED_CATEGORIES.map((category, index) => [category, index])
7
  );
8
 
9
+ const SORTED_MODELS = ["openai/gpt-4o-2024-11-20", "mistral-small-3.2-24b-instruct-2506-q8", "Qwen3-Next-80B-A3B-Instruct-8bit", "deepseek-chat-v3.2", "gemma-3-27b-it", "pitti/pap", "mistral-small-3.1-24b-instruct-2503"];
10
 
11
  const MODEL_SORT_MAP = new Map(
12
  SORTED_MODELS.map((model, index) => [model, index])