Spaces:

Nitishkumar-ai
/

arena-learning

No application file

File size: 5,470 Bytes

c20f20c

/**
 * Action Parser - converts structured JSON Array output to Action[]
 *
 * Bridges the stateless-generate parser (used for online streaming) with the
 * offline generation pipeline, producing typed Action objects that preserve
 * the original interleaving order from the LLM output.
 *
 * For complete (non-streaming) responses, uses JSON.parse with partial-json
 * fallback for robustness.
 */

import type { Action, ActionType } from '@/lib/types/action';
import { SLIDE_ONLY_ACTIONS } from '@/lib/types/action';
import { nanoid } from 'nanoid';
import { parse as parsePartialJson, Allow } from 'partial-json';
import { jsonrepair } from 'jsonrepair';
import { createLogger } from '@/lib/logger';
const log = createLogger('ActionParser');

/**
 * Strip markdown code fences (```json ... ``` or ``` ... ```) from a response string.
 */
function stripCodeFences(text: string): string {
  // Remove opening ```json or ``` and closing ```
  return text.replace(/^```(?:json)?\s*\n?/i, '').replace(/\n?\s*```\s*$/i, '');
}

/**
 * Parse a complete LLM response in JSON Array format into an ordered Action[] array.
 *
 * Expected format (new):
 * [{"type":"action","name":"spotlight","params":{"elementId":"..."}},
 *  {"type":"text","content":"speech content"},...]
 *
 * Also supports legacy format:
 * [{"type":"action","tool_name":"spotlight","parameters":{"elementId":"..."}},...]
 *
 * Text items become `speech` actions; action items are converted to their
 * respective action types (spotlight, discussion, etc.).
 * The original interleaving order is preserved.
 */
export function parseActionsFromStructuredOutput(
  response: string,
  sceneType?: string,
  allowedActions?: string[],
): Action[] {
  // Step 1: Strip markdown code fences if present
  const cleaned = stripCodeFences(response.trim());

  // Step 2: Find the JSON array range
  const startIdx = cleaned.indexOf('[');
  const endIdx = cleaned.lastIndexOf(']');

  if (startIdx === -1) {
    log.warn('No JSON array found in response');
    return [];
  }

  const jsonStr = endIdx > startIdx ? cleaned.slice(startIdx, endIdx + 1) : cleaned.slice(startIdx); // unclosed array — let partial-json handle it

  // Step 3: Parse — try JSON.parse first, then jsonrepair, fallback to partial-json
  let items: unknown[];
  try {
    items = JSON.parse(jsonStr);
  } catch {
    // Try jsonrepair to fix malformed JSON (e.g. unescaped quotes in Chinese text)
    try {
      items = JSON.parse(jsonrepair(jsonStr));
      log.info('Recovered malformed JSON via jsonrepair');
    } catch {
      try {
        items = parsePartialJson(
          jsonStr,
          Allow.ARR | Allow.OBJ | Allow.STR | Allow.NUM | Allow.BOOL | Allow.NULL,
        );
      } catch (e) {
        log.warn('Failed to parse JSON array:', (e as Error).message);
        return [];
      }
    }
  }

  if (!Array.isArray(items)) {
    log.warn('Parsed result is not an array');
    return [];
  }

  // Step 4: Convert items to Action[]
  const actions: Action[] = [];

  for (const item of items) {
    if (!item || typeof item !== 'object' || !('type' in item)) continue;
    const typedItem = item as Record<string, unknown>;

    if (typedItem.type === 'text') {
      const text = ((typedItem.content as string) || '').trim();
      if (text) {
        actions.push({
          id: `action_${nanoid(8)}`,
          type: 'speech',
          text,
        });
      }
    } else if (typedItem.type === 'action') {
      try {
        // Support both new format (name/params) and legacy format (tool_name/parameters)
        const actionName = typedItem.name || typedItem.tool_name;
        const actionParams = (typedItem.params || typedItem.parameters || {}) as Record<
          string,
          unknown
        >;
        actions.push({
          id: (typedItem.action_id || typedItem.tool_id || `action_${nanoid(8)}`) as string,
          type: actionName as Action['type'],
          ...actionParams,
        } as Action);
      } catch (_e) {
        log.warn('Invalid action item, skipping:', JSON.stringify(typedItem).slice(0, 100));
      }
    }
  }

  // Step 5: Post-processing — discussion must be the last action, and at most one
  const discussionIdx = actions.findIndex((a) => a.type === 'discussion');
  if (discussionIdx !== -1 && discussionIdx < actions.length - 1) {
    actions.splice(discussionIdx + 1);
  }

  // Step 6: Filter out slide-only actions for non-slide scenes (defense in depth)
  if (sceneType && sceneType !== 'slide') {
    const before = actions.length;
    const filtered = actions.filter((a) => !SLIDE_ONLY_ACTIONS.includes(a.type as ActionType));
    if (filtered.length < before) {
      log.info(`Stripped ${before - filtered.length} slide-only action(s) from ${sceneType} scene`);
    }
    return filtered;
  }

  // Step 7: Filter by allowedActions whitelist (defense in depth for role-based isolation)
  // Catches hallucinated actions not in the agent's permitted set, e.g. a student agent
  // mimicking spotlight/laser after seeing teacher actions in chat history.
  if (allowedActions && allowedActions.length > 0) {
    const before = actions.length;
    const filtered = actions.filter((a) => a.type === 'speech' || allowedActions.includes(a.type));
    if (filtered.length < before) {
      log.info(
        `Stripped ${before - filtered.length} disallowed action(s) by allowedActions whitelist`,
      );
    }
    return filtered;
  }

  return actions;
}