arena-learning / studyArena /lib /generation /action-parser.ts
NitishStark's picture
Upload folder using huggingface_hub
c20f20c verified
/**
* Action Parser - converts structured JSON Array output to Action[]
*
* Bridges the stateless-generate parser (used for online streaming) with the
* offline generation pipeline, producing typed Action objects that preserve
* the original interleaving order from the LLM output.
*
* For complete (non-streaming) responses, uses JSON.parse with partial-json
* fallback for robustness.
*/
import type { Action, ActionType } from '@/lib/types/action';
import { SLIDE_ONLY_ACTIONS } from '@/lib/types/action';
import { nanoid } from 'nanoid';
import { parse as parsePartialJson, Allow } from 'partial-json';
import { jsonrepair } from 'jsonrepair';
import { createLogger } from '@/lib/logger';
const log = createLogger('ActionParser');
/**
* Strip markdown code fences (```json ... ``` or ``` ... ```) from a response string.
*/
function stripCodeFences(text: string): string {
// Remove opening ```json or ``` and closing ```
return text.replace(/^```(?:json)?\s*\n?/i, '').replace(/\n?\s*```\s*$/i, '');
}
/**
* Parse a complete LLM response in JSON Array format into an ordered Action[] array.
*
* Expected format (new):
* [{"type":"action","name":"spotlight","params":{"elementId":"..."}},
* {"type":"text","content":"speech content"},...]
*
* Also supports legacy format:
* [{"type":"action","tool_name":"spotlight","parameters":{"elementId":"..."}},...]
*
* Text items become `speech` actions; action items are converted to their
* respective action types (spotlight, discussion, etc.).
* The original interleaving order is preserved.
*/
export function parseActionsFromStructuredOutput(
response: string,
sceneType?: string,
allowedActions?: string[],
): Action[] {
// Step 1: Strip markdown code fences if present
const cleaned = stripCodeFences(response.trim());
// Step 2: Find the JSON array range
const startIdx = cleaned.indexOf('[');
const endIdx = cleaned.lastIndexOf(']');
if (startIdx === -1) {
log.warn('No JSON array found in response');
return [];
}
const jsonStr = endIdx > startIdx ? cleaned.slice(startIdx, endIdx + 1) : cleaned.slice(startIdx); // unclosed array — let partial-json handle it
// Step 3: Parse — try JSON.parse first, then jsonrepair, fallback to partial-json
let items: unknown[];
try {
items = JSON.parse(jsonStr);
} catch {
// Try jsonrepair to fix malformed JSON (e.g. unescaped quotes in Chinese text)
try {
items = JSON.parse(jsonrepair(jsonStr));
log.info('Recovered malformed JSON via jsonrepair');
} catch {
try {
items = parsePartialJson(
jsonStr,
Allow.ARR | Allow.OBJ | Allow.STR | Allow.NUM | Allow.BOOL | Allow.NULL,
);
} catch (e) {
log.warn('Failed to parse JSON array:', (e as Error).message);
return [];
}
}
}
if (!Array.isArray(items)) {
log.warn('Parsed result is not an array');
return [];
}
// Step 4: Convert items to Action[]
const actions: Action[] = [];
for (const item of items) {
if (!item || typeof item !== 'object' || !('type' in item)) continue;
const typedItem = item as Record<string, unknown>;
if (typedItem.type === 'text') {
const text = ((typedItem.content as string) || '').trim();
if (text) {
actions.push({
id: `action_${nanoid(8)}`,
type: 'speech',
text,
});
}
} else if (typedItem.type === 'action') {
try {
// Support both new format (name/params) and legacy format (tool_name/parameters)
const actionName = typedItem.name || typedItem.tool_name;
const actionParams = (typedItem.params || typedItem.parameters || {}) as Record<
string,
unknown
>;
actions.push({
id: (typedItem.action_id || typedItem.tool_id || `action_${nanoid(8)}`) as string,
type: actionName as Action['type'],
...actionParams,
} as Action);
} catch (_e) {
log.warn('Invalid action item, skipping:', JSON.stringify(typedItem).slice(0, 100));
}
}
}
// Step 5: Post-processing — discussion must be the last action, and at most one
const discussionIdx = actions.findIndex((a) => a.type === 'discussion');
if (discussionIdx !== -1 && discussionIdx < actions.length - 1) {
actions.splice(discussionIdx + 1);
}
// Step 6: Filter out slide-only actions for non-slide scenes (defense in depth)
if (sceneType && sceneType !== 'slide') {
const before = actions.length;
const filtered = actions.filter((a) => !SLIDE_ONLY_ACTIONS.includes(a.type as ActionType));
if (filtered.length < before) {
log.info(`Stripped ${before - filtered.length} slide-only action(s) from ${sceneType} scene`);
}
return filtered;
}
// Step 7: Filter by allowedActions whitelist (defense in depth for role-based isolation)
// Catches hallucinated actions not in the agent's permitted set, e.g. a student agent
// mimicking spotlight/laser after seeing teacher actions in chat history.
if (allowedActions && allowedActions.length > 0) {
const before = actions.length;
const filtered = actions.filter((a) => a.type === 'speech' || allowedActions.includes(a.type));
if (filtered.length < before) {
log.info(
`Stripped ${before - filtered.length} disallowed action(s) by allowedActions whitelist`,
);
}
return filtered;
}
return actions;
}