File size: 5,470 Bytes
c20f20c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
/**
 * Action Parser - converts structured JSON Array output to Action[]
 *
 * Bridges the stateless-generate parser (used for online streaming) with the
 * offline generation pipeline, producing typed Action objects that preserve
 * the original interleaving order from the LLM output.
 *
 * For complete (non-streaming) responses, uses JSON.parse with partial-json
 * fallback for robustness.
 */

import type { Action, ActionType } from '@/lib/types/action';
import { SLIDE_ONLY_ACTIONS } from '@/lib/types/action';
import { nanoid } from 'nanoid';
import { parse as parsePartialJson, Allow } from 'partial-json';
import { jsonrepair } from 'jsonrepair';
import { createLogger } from '@/lib/logger';
const log = createLogger('ActionParser');

/**
 * Strip markdown code fences (```json ... ``` or ``` ... ```) from a response string.
 */
function stripCodeFences(text: string): string {
  // Remove opening ```json or ``` and closing ```
  return text.replace(/^```(?:json)?\s*\n?/i, '').replace(/\n?\s*```\s*$/i, '');
}

/**
 * Parse a complete LLM response in JSON Array format into an ordered Action[] array.
 *
 * Expected format (new):
 * [{"type":"action","name":"spotlight","params":{"elementId":"..."}},
 *  {"type":"text","content":"speech content"},...]
 *
 * Also supports legacy format:
 * [{"type":"action","tool_name":"spotlight","parameters":{"elementId":"..."}},...]
 *
 * Text items become `speech` actions; action items are converted to their
 * respective action types (spotlight, discussion, etc.).
 * The original interleaving order is preserved.
 */
export function parseActionsFromStructuredOutput(
  response: string,
  sceneType?: string,
  allowedActions?: string[],
): Action[] {
  // Step 1: Strip markdown code fences if present
  const cleaned = stripCodeFences(response.trim());

  // Step 2: Find the JSON array range
  const startIdx = cleaned.indexOf('[');
  const endIdx = cleaned.lastIndexOf(']');

  if (startIdx === -1) {
    log.warn('No JSON array found in response');
    return [];
  }

  const jsonStr = endIdx > startIdx ? cleaned.slice(startIdx, endIdx + 1) : cleaned.slice(startIdx); // unclosed array — let partial-json handle it

  // Step 3: Parse — try JSON.parse first, then jsonrepair, fallback to partial-json
  let items: unknown[];
  try {
    items = JSON.parse(jsonStr);
  } catch {
    // Try jsonrepair to fix malformed JSON (e.g. unescaped quotes in Chinese text)
    try {
      items = JSON.parse(jsonrepair(jsonStr));
      log.info('Recovered malformed JSON via jsonrepair');
    } catch {
      try {
        items = parsePartialJson(
          jsonStr,
          Allow.ARR | Allow.OBJ | Allow.STR | Allow.NUM | Allow.BOOL | Allow.NULL,
        );
      } catch (e) {
        log.warn('Failed to parse JSON array:', (e as Error).message);
        return [];
      }
    }
  }

  if (!Array.isArray(items)) {
    log.warn('Parsed result is not an array');
    return [];
  }

  // Step 4: Convert items to Action[]
  const actions: Action[] = [];

  for (const item of items) {
    if (!item || typeof item !== 'object' || !('type' in item)) continue;
    const typedItem = item as Record<string, unknown>;

    if (typedItem.type === 'text') {
      const text = ((typedItem.content as string) || '').trim();
      if (text) {
        actions.push({
          id: `action_${nanoid(8)}`,
          type: 'speech',
          text,
        });
      }
    } else if (typedItem.type === 'action') {
      try {
        // Support both new format (name/params) and legacy format (tool_name/parameters)
        const actionName = typedItem.name || typedItem.tool_name;
        const actionParams = (typedItem.params || typedItem.parameters || {}) as Record<
          string,
          unknown
        >;
        actions.push({
          id: (typedItem.action_id || typedItem.tool_id || `action_${nanoid(8)}`) as string,
          type: actionName as Action['type'],
          ...actionParams,
        } as Action);
      } catch (_e) {
        log.warn('Invalid action item, skipping:', JSON.stringify(typedItem).slice(0, 100));
      }
    }
  }

  // Step 5: Post-processing — discussion must be the last action, and at most one
  const discussionIdx = actions.findIndex((a) => a.type === 'discussion');
  if (discussionIdx !== -1 && discussionIdx < actions.length - 1) {
    actions.splice(discussionIdx + 1);
  }

  // Step 6: Filter out slide-only actions for non-slide scenes (defense in depth)
  if (sceneType && sceneType !== 'slide') {
    const before = actions.length;
    const filtered = actions.filter((a) => !SLIDE_ONLY_ACTIONS.includes(a.type as ActionType));
    if (filtered.length < before) {
      log.info(`Stripped ${before - filtered.length} slide-only action(s) from ${sceneType} scene`);
    }
    return filtered;
  }

  // Step 7: Filter by allowedActions whitelist (defense in depth for role-based isolation)
  // Catches hallucinated actions not in the agent's permitted set, e.g. a student agent
  // mimicking spotlight/laser after seeing teacher actions in chat history.
  if (allowedActions && allowedActions.length > 0) {
    const before = actions.length;
    const filtered = actions.filter((a) => a.type === 'speech' || allowedActions.includes(a.type));
    if (filtered.length < before) {
      log.info(
        `Stripped ${before - filtered.length} disallowed action(s) by allowedActions whitelist`,
      );
    }
    return filtered;
  }

  return actions;
}