Spaces:

davanstrien
/

ocr-time-capsule

Running

App Files Files Community

ocr-time-capsule / js /reasoning-parser.js

davanstrien's picture

davanstrien HF Staff

Fix reasoning trace parsing for incomplete XML tags

2e33030 4 months ago

history blame contribute delete

8.1 kB

	/**
	* Reasoning Trace Parser
	* Handles parsing and formatting of model reasoning traces from OCR outputs
	*/

	class ReasoningParser {
	/**
	* Detect if text contains reasoning trace markers
	* @param {string} text - The text to check
	* @returns {boolean} - True if reasoning trace is detected
	*/
	static detectReasoningTrace(text) {
	if (!text \|\| typeof text !== 'string') return false;

	// Check for complete reasoning trace patterns (both opening and closing tags)
	const completePatterns = [
	{ start: /<think>/i, end: /<\/think>/i },
	{ start: /<thinking>/i, end: /<\/thinking>/i },
	{ start: /<reasoning>/i, end: /<\/reasoning>/i },
	{ start: /<thought>/i, end: /<\/thought>/i }
	];

	// Only return true if we find BOTH opening and closing tags
	return completePatterns.some(pattern =>
	pattern.start.test(text) && pattern.end.test(text)
	);
	}

	/**
	* Parse reasoning content from text
	* @param {string} text - The text containing reasoning trace
	* @returns {object} - Object with reasoning and answer sections
	*/
	static parseReasoningContent(text) {
	if (!text) {
	return { reasoning: null, answer: null, original: text };
	}

	// Try multiple patterns for flexibility
	const patterns = [
	{
	start: /<think>/i,
	end: /<\/think>/i,
	answerStart: /<answer>/i,
	answerEnd: /<\/answer>/i
	},
	{
	start: /<thinking>/i,
	end: /<\/thinking>/i,
	answerStart: /<answer>/i,
	answerEnd: /<\/answer>/i
	},
	{
	start: /<reasoning>/i,
	end: /<\/reasoning>/i,
	answerStart: /<output>/i,
	answerEnd: /<\/output>/i
	}
	];

	for (const pattern of patterns) {
	const reasoningMatch = text.match(new RegExp(
	pattern.start.source + '([\\s\\S]*?)' + pattern.end.source,
	'i'
	));

	const answerMatch = text.match(new RegExp(
	pattern.answerStart.source + '([\\s\\S]*?)' + pattern.answerEnd.source,
	'i'
	));

	if (reasoningMatch \|\| answerMatch) {
	return {
	reasoning: reasoningMatch ? reasoningMatch[1].trim() : null,
	answer: answerMatch ? answerMatch[1].trim() : null,
	hasReasoning: !!reasoningMatch,
	hasAnswer: !!answerMatch,
	original: text
	};
	}
	}

	// Check if there are incomplete reasoning tags (opening but no closing)
	const hasOpeningTag = /<think>\|<thinking>\|<reasoning>\|<thought>/i.test(text);
	if (hasOpeningTag) {
	console.warn('Incomplete reasoning trace detected - missing closing tags');
	}

	// If no patterns match, return original text as answer
	return {
	reasoning: null,
	answer: text,
	hasReasoning: false,
	hasAnswer: true,
	original: text
	};
	}

	/**
	* Format reasoning steps for display
	* @param {string} reasoningText - The raw reasoning text
	* @returns {object} - Formatted reasoning with steps and metadata
	*/
	static formatReasoningSteps(reasoningText) {
	if (!reasoningText) return null;

	// Parse numbered steps (e.g., "1. Step content")
	const stepPattern = /^\d+\.\s+\\(.+?)\\(.+?)(?=^\d+\.\s\|\z)/gms;
	const steps = [];
	let match;

	while ((match = stepPattern.exec(reasoningText)) !== null) {
	steps.push({
	title: match[1].trim(),
	content: match[2].trim()
	});
	}

	// If no numbered steps found, try to parse by line breaks
	if (steps.length === 0) {
	const lines = reasoningText.split('\n').filter(line => line.trim());
	lines.forEach((line, index) => {
	// Check if line starts with a number
	const numberedMatch = line.match(/^(\d+)\.\s*(.+)/);
	if (numberedMatch) {
	const title = numberedMatch[2].replace(/\\/g, '').trim();
	steps.push({
	number: numberedMatch[1],
	title: title,
	content: ''
	});
	} else if (steps.length > 0) {
	// Add to previous step's content
	steps[steps.length - 1].content += '\n' + line;
	}
	});
	}

	return {
	steps: steps,
	rawText: reasoningText,
	stepCount: steps.length,
	characterCount: reasoningText.length,
	wordCount: reasoningText.split(/\s+/).filter(w => w).length
	};
	}

	/**
	* Extract key insights from reasoning
	* @param {string} reasoningText - The reasoning text
	* @returns {array} - Array of key insights or decisions
	*/
	static extractInsights(reasoningText) {
	if (!reasoningText) return [];

	const insights = [];

	// Look for decision points and key observations
	const patterns = [
	/decision:\s*(.+)/gi,
	/observation:\s*(.+)/gi,
	/note:\s*(.+)/gi,
	/important:\s*(.+)/gi,
	/key finding:\s*(.+)/gi
	];

	patterns.forEach(pattern => {
	let match;
	while ((match = pattern.exec(reasoningText)) !== null) {
	insights.push(match[1].trim());
	}
	});

	return insights;
	}

	/**
	* Get summary statistics about the reasoning trace
	* @param {object} parsedContent - Parsed reasoning content
	* @returns {object} - Statistics about the reasoning
	*/
	static getReasoningStats(parsedContent) {
	if (!parsedContent \|\| !parsedContent.reasoning) {
	return {
	hasReasoning: false,
	reasoningLength: 0,
	answerLength: 0,
	reasoningRatio: 0
	};
	}

	const reasoningLength = parsedContent.reasoning.length;
	const answerLength = parsedContent.answer ? parsedContent.answer.length : 0;
	const totalLength = reasoningLength + answerLength;

	return {
	hasReasoning: true,
	reasoningLength: reasoningLength,
	answerLength: answerLength,
	totalLength: totalLength,
	reasoningRatio: totalLength > 0 ? (reasoningLength / totalLength * 100).toFixed(1) : 0,
	reasoningWords: parsedContent.reasoning.split(/\s+/).filter(w => w).length,
	answerWords: parsedContent.answer ? parsedContent.answer.split(/\s+/).filter(w => w).length : 0
	};
	}

	/**
	* Format reasoning for export
	* @param {object} parsedContent - Parsed reasoning content
	* @param {boolean} includeReasoning - Whether to include reasoning in export
	* @returns {string} - Formatted text for export
	*/
	static formatForExport(parsedContent, includeReasoning = true) {
	if (!parsedContent) return '';

	let exportText = '';

	if (includeReasoning && parsedContent.reasoning) {
	exportText += '=== MODEL REASONING ===\n\n';
	exportText += parsedContent.reasoning;
	exportText += '\n\n=== FINAL OUTPUT ===\n\n';
	}

	if (parsedContent.answer) {
	exportText += parsedContent.answer;
	}

	return exportText;
	}
	}

	// Export for use in other scripts
	window.ReasoningParser = ReasoningParser;