Spaces:
No application file
No application file
File size: 5,470 Bytes
c20f20c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 | /**
* Action Parser - converts structured JSON Array output to Action[]
*
* Bridges the stateless-generate parser (used for online streaming) with the
* offline generation pipeline, producing typed Action objects that preserve
* the original interleaving order from the LLM output.
*
* For complete (non-streaming) responses, uses JSON.parse with partial-json
* fallback for robustness.
*/
import type { Action, ActionType } from '@/lib/types/action';
import { SLIDE_ONLY_ACTIONS } from '@/lib/types/action';
import { nanoid } from 'nanoid';
import { parse as parsePartialJson, Allow } from 'partial-json';
import { jsonrepair } from 'jsonrepair';
import { createLogger } from '@/lib/logger';
const log = createLogger('ActionParser');
/**
* Strip markdown code fences (```json ... ``` or ``` ... ```) from a response string.
*/
function stripCodeFences(text: string): string {
// Remove opening ```json or ``` and closing ```
return text.replace(/^```(?:json)?\s*\n?/i, '').replace(/\n?\s*```\s*$/i, '');
}
/**
* Parse a complete LLM response in JSON Array format into an ordered Action[] array.
*
* Expected format (new):
* [{"type":"action","name":"spotlight","params":{"elementId":"..."}},
* {"type":"text","content":"speech content"},...]
*
* Also supports legacy format:
* [{"type":"action","tool_name":"spotlight","parameters":{"elementId":"..."}},...]
*
* Text items become `speech` actions; action items are converted to their
* respective action types (spotlight, discussion, etc.).
* The original interleaving order is preserved.
*/
export function parseActionsFromStructuredOutput(
response: string,
sceneType?: string,
allowedActions?: string[],
): Action[] {
// Step 1: Strip markdown code fences if present
const cleaned = stripCodeFences(response.trim());
// Step 2: Find the JSON array range
const startIdx = cleaned.indexOf('[');
const endIdx = cleaned.lastIndexOf(']');
if (startIdx === -1) {
log.warn('No JSON array found in response');
return [];
}
const jsonStr = endIdx > startIdx ? cleaned.slice(startIdx, endIdx + 1) : cleaned.slice(startIdx); // unclosed array — let partial-json handle it
// Step 3: Parse — try JSON.parse first, then jsonrepair, fallback to partial-json
let items: unknown[];
try {
items = JSON.parse(jsonStr);
} catch {
// Try jsonrepair to fix malformed JSON (e.g. unescaped quotes in Chinese text)
try {
items = JSON.parse(jsonrepair(jsonStr));
log.info('Recovered malformed JSON via jsonrepair');
} catch {
try {
items = parsePartialJson(
jsonStr,
Allow.ARR | Allow.OBJ | Allow.STR | Allow.NUM | Allow.BOOL | Allow.NULL,
);
} catch (e) {
log.warn('Failed to parse JSON array:', (e as Error).message);
return [];
}
}
}
if (!Array.isArray(items)) {
log.warn('Parsed result is not an array');
return [];
}
// Step 4: Convert items to Action[]
const actions: Action[] = [];
for (const item of items) {
if (!item || typeof item !== 'object' || !('type' in item)) continue;
const typedItem = item as Record<string, unknown>;
if (typedItem.type === 'text') {
const text = ((typedItem.content as string) || '').trim();
if (text) {
actions.push({
id: `action_${nanoid(8)}`,
type: 'speech',
text,
});
}
} else if (typedItem.type === 'action') {
try {
// Support both new format (name/params) and legacy format (tool_name/parameters)
const actionName = typedItem.name || typedItem.tool_name;
const actionParams = (typedItem.params || typedItem.parameters || {}) as Record<
string,
unknown
>;
actions.push({
id: (typedItem.action_id || typedItem.tool_id || `action_${nanoid(8)}`) as string,
type: actionName as Action['type'],
...actionParams,
} as Action);
} catch (_e) {
log.warn('Invalid action item, skipping:', JSON.stringify(typedItem).slice(0, 100));
}
}
}
// Step 5: Post-processing — discussion must be the last action, and at most one
const discussionIdx = actions.findIndex((a) => a.type === 'discussion');
if (discussionIdx !== -1 && discussionIdx < actions.length - 1) {
actions.splice(discussionIdx + 1);
}
// Step 6: Filter out slide-only actions for non-slide scenes (defense in depth)
if (sceneType && sceneType !== 'slide') {
const before = actions.length;
const filtered = actions.filter((a) => !SLIDE_ONLY_ACTIONS.includes(a.type as ActionType));
if (filtered.length < before) {
log.info(`Stripped ${before - filtered.length} slide-only action(s) from ${sceneType} scene`);
}
return filtered;
}
// Step 7: Filter by allowedActions whitelist (defense in depth for role-based isolation)
// Catches hallucinated actions not in the agent's permitted set, e.g. a student agent
// mimicking spotlight/laser after seeing teacher actions in chat history.
if (allowedActions && allowedActions.length > 0) {
const before = actions.length;
const filtered = actions.filter((a) => a.type === 'speech' || allowedActions.includes(a.type));
if (filtered.length < before) {
log.info(
`Stripped ${before - filtered.length} disallowed action(s) by allowedActions whitelist`,
);
}
return filtered;
}
return actions;
}
|