Malaji71
Portada robusta (limpia comillas, detecta fecha por patrón, contact por teléfono/email) + IA anti-alucinaciones (Qwen2.5-72B, temp 0.1, prompt estricto)
228497a | // parser.js — Convierte un archivo .fdx (XML) en un JSON estructurado. | |
| const TRAMOYA_VERSION = '1.0'; | |
| const KNOWN_TYPES = new Set([ | |
| 'Scene Heading', | |
| 'Action', | |
| 'Character', | |
| 'Parenthetical', | |
| 'Dialogue', | |
| 'Transition', | |
| 'Shot', | |
| 'General' | |
| ]); | |
| export function parseFdx(xmlText) { | |
| const parser = new DOMParser(); | |
| const doc = parser.parseFromString(xmlText, 'application/xml'); | |
| const parserError = doc.querySelector('parsererror'); | |
| if (parserError) { | |
| throw new Error('El archivo no es un XML válido. Asegúrate de que es un .fdx generado por Final Draft.'); | |
| } | |
| const root = doc.querySelector('FinalDraft'); | |
| if (!root) { | |
| throw new Error('No se ha encontrado el elemento <FinalDraft>. ¿Es un archivo .fdx auténtico?'); | |
| } | |
| const titlePage = parseTitlePage(doc); | |
| const paragraphs = Array.from(doc.querySelectorAll('Content > Paragraph')); | |
| const scenes = buildScenes(paragraphs); | |
| fillStartPages(scenes); | |
| const characters = collectCharacters(scenes); | |
| const stats = computeStats(scenes, paragraphs); | |
| return { | |
| tramoya_version: TRAMOYA_VERSION, | |
| source_format: 'fdx', | |
| generated_at: new Date().toISOString(), | |
| title_page: titlePage, | |
| scenes, | |
| characters, | |
| stats | |
| }; | |
| } | |
| function parseTitlePage(doc) { | |
| const tp = doc.querySelector('TitlePage'); | |
| if (!tp) { | |
| return { title: null, author: null, contact: null, draft_date: null, raw: '' }; | |
| } | |
| const lines = []; | |
| tp.querySelectorAll('Paragraph').forEach(p => { | |
| const text = collectText(p).trim(); | |
| if (text) lines.push(text); | |
| }); | |
| const raw = lines.join('\n'); | |
| const findAfter = (labels) => { | |
| for (let i = 0; i < lines.length; i++) { | |
| const trimmed = lines[i].trim(); | |
| const stripped = trimmed.toLowerCase().replace(/[:.\s]+$/, ''); | |
| for (const label of labels) { | |
| // Línea exacta = label (p.ej. "Por", "By"): cogemos la siguiente. | |
| if (stripped === label) { | |
| if (lines[i + 1]) return lines[i + 1].trim(); | |
| continue; | |
| } | |
| // Línea empieza con label + separador (".", ":", "-", "—", "–") + contenido. | |
| const re = new RegExp(`^${escapeRegex(label)}\\s*[.:\\-—–]\\s*(.+)$`, 'i'); | |
| const m = trimmed.match(re); | |
| if (m) return m[1].trim(); | |
| } | |
| } | |
| return null; | |
| }; | |
| // Limpia comillas envolventes («"MIGUEL"» → «MIGUEL»). FDX a veces las | |
| // guarda literalmente en <Text>. | |
| const stripQuotes = (s) => { | |
| if (!s) return s; | |
| return s.trim().replace(/^["“”'`]+|["“”'`]+$/g, '').trim(); | |
| }; | |
| // Detector de fecha por patrón (DD/MM/YY, DD-MM-YYYY, etc.) cuando no hay | |
| // label explícito antes de la fecha. | |
| const dateRegex = /\b\d{1,2}[\/\-.]\d{1,2}[\/\-.]\d{2,4}\b/; | |
| const findDateInLines = () => { | |
| for (const line of lines) { | |
| if (dateRegex.test(line)) return line.trim(); | |
| } | |
| return null; | |
| }; | |
| // Detector de contacto: bloque al final con teléfono o email. Captura | |
| // hasta 3 líneas previas asumiendo dirección/nombre. | |
| const phoneRegex = /\+?\d[\d\s\-().]{6,}/; | |
| const emailRegex = /\S+@\S+\.\S+/; | |
| const findContact = () => { | |
| let idx = -1; | |
| for (let i = lines.length - 1; i >= 0; i--) { | |
| if (emailRegex.test(lines[i]) || phoneRegex.test(lines[i])) { | |
| idx = i; | |
| break; | |
| } | |
| } | |
| if (idx < 0) return null; | |
| const start = Math.max(0, idx - 3); | |
| return lines.slice(start, idx + 1).join('\n'); | |
| }; | |
| const title = stripQuotes(lines[0] || null); | |
| const author = findAfter([ | |
| 'por', 'by', 'written by', 'escrito por', 'autor', 'author', 'guion', 'guión' | |
| ]); | |
| const contact = findAfter(['contact', 'contacto']) || findContact(); | |
| const draftDate = | |
| findAfter(['draft date', 'date', 'fecha', 'ver', 'version', 'versión']) || | |
| findDateInLines(); | |
| return { title, author, contact, draft_date: draftDate, raw }; | |
| } | |
| function collectText(node) { | |
| const texts = []; | |
| node.querySelectorAll('Text').forEach(t => texts.push(t.textContent || '')); | |
| return texts.join(''); | |
| } | |
| function escapeRegex(s) { | |
| return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); | |
| } | |
| // Forward-fill: Final Draft omite Page="1" en la primera escena. Si al menos | |
| // una escena tiene Page asignado, asumimos que la primera empieza en 1 y | |
| // rellenamos huecos heredando de la escena anterior. | |
| function fillStartPages(scenes) { | |
| if (!scenes.length) return; | |
| const anyPage = scenes.some(s => Number.isFinite(s.start_page)); | |
| if (!anyPage) return; // FD nunca paginó: no inventamos números. | |
| if (!Number.isFinite(scenes[0].start_page)) { | |
| scenes[0].start_page = 1; | |
| } | |
| for (let i = 1; i < scenes.length; i++) { | |
| if (!Number.isFinite(scenes[i].start_page)) { | |
| scenes[i].start_page = scenes[i - 1].start_page; | |
| } | |
| } | |
| } | |
| function buildScenes(paragraphs) { | |
| const scenes = []; | |
| let current = null; | |
| let sceneNumber = 0; | |
| const flushDialogueBuffer = (buffer) => { | |
| if (!buffer || !current) return; | |
| current.elements.push(buffer); | |
| }; | |
| let dialogueBuffer = null; | |
| for (const p of paragraphs) { | |
| const type = p.getAttribute('Type') || 'General'; | |
| const text = collectText(p); | |
| if (type === 'Scene Heading') { | |
| flushDialogueBuffer(dialogueBuffer); | |
| dialogueBuffer = null; | |
| sceneNumber += 1; | |
| const heading = text.trim(); | |
| const parsed = parseSceneHeading(heading); | |
| const props = p.querySelector('SceneProperties'); | |
| const pageAttr = props?.getAttribute('Page'); | |
| const startPage = pageAttr ? parseInt(pageAttr, 10) : NaN; | |
| current = { | |
| scene_number: sceneNumber, | |
| heading, | |
| interior_exterior: parsed.interior_exterior, | |
| location: parsed.location, | |
| time_of_day: parsed.time_of_day, | |
| start_page: Number.isFinite(startPage) ? startPage : null, | |
| elements: [] | |
| }; | |
| scenes.push(current); | |
| continue; | |
| } | |
| if (!current) { | |
| // Texto antes de la primera escena: ignorar para mantener la estructura limpia. | |
| continue; | |
| } | |
| if (type === 'Character') { | |
| flushDialogueBuffer(dialogueBuffer); | |
| dialogueBuffer = { | |
| type: 'character', | |
| name: text.trim(), | |
| parenthetical: null, | |
| dialogue: '' | |
| }; | |
| continue; | |
| } | |
| if (type === 'Parenthetical') { | |
| const clean = text.trim().replace(/^\(/, '').replace(/\)$/, ''); | |
| if (dialogueBuffer) { | |
| dialogueBuffer.parenthetical = clean; | |
| } else { | |
| // Parenthetical huérfano: tratarlo como acción. | |
| current.elements.push({ type: 'action', text: text.trim() }); | |
| } | |
| continue; | |
| } | |
| if (type === 'Dialogue') { | |
| if (dialogueBuffer) { | |
| dialogueBuffer.dialogue = dialogueBuffer.dialogue | |
| ? dialogueBuffer.dialogue + ' ' + text.trim() | |
| : text.trim(); | |
| } else { | |
| // Diálogo huérfano: guardarlo como fragmento sin personaje. | |
| current.elements.push({ | |
| type: 'character', | |
| name: null, | |
| parenthetical: null, | |
| dialogue: text.trim() | |
| }); | |
| } | |
| continue; | |
| } | |
| // Cualquier otro tipo cierra el buffer de diálogo en curso. | |
| flushDialogueBuffer(dialogueBuffer); | |
| dialogueBuffer = null; | |
| if (type === 'Action' || type === 'General') { | |
| current.elements.push({ type: 'action', text: text.trim() }); | |
| continue; | |
| } | |
| if (type === 'Transition') { | |
| current.elements.push({ type: 'transition', text: text.trim() }); | |
| continue; | |
| } | |
| if (type === 'Shot') { | |
| current.elements.push({ type: 'shot', text: text.trim() }); | |
| continue; | |
| } | |
| // Tipo desconocido: preservar tal cual. | |
| current.elements.push({ | |
| type: 'unknown', | |
| unknown_type: type, | |
| text: text.trim() | |
| }); | |
| } | |
| // Volcar último buffer pendiente. | |
| flushDialogueBuffer(dialogueBuffer); | |
| return scenes; | |
| } | |
| function parseSceneHeading(heading) { | |
| if (!heading) { | |
| return { interior_exterior: null, location: null, time_of_day: null }; | |
| } | |
| // Patrones habituales: INT./EXT./INT-EXT./EXT-INT./INT/EXT | |
| const prefixMatch = heading.match(/^(INT\.?\/EXT\.?|EXT\.?\/INT\.?|INT-EXT\.?|EXT-INT\.?|INT\.?|EXT\.?)\s*[-.\s]\s*(.*)$/i); | |
| if (!prefixMatch) { | |
| return { interior_exterior: null, location: null, time_of_day: null }; | |
| } | |
| const ie = prefixMatch[1] | |
| .toUpperCase() | |
| .replace(/\./g, '') | |
| .replace(/\s+/g, ''); | |
| const rest = prefixMatch[2].trim(); | |
| // Separar ubicación de momento del día por el último guion. | |
| const lastDash = Math.max(rest.lastIndexOf(' - '), rest.lastIndexOf(' – '), rest.lastIndexOf(' — ')); | |
| let location = rest; | |
| let timeOfDay = null; | |
| if (lastDash !== -1) { | |
| location = rest.slice(0, lastDash).trim(); | |
| timeOfDay = rest.slice(lastDash + 3).trim(); | |
| } | |
| return { | |
| interior_exterior: ie || null, | |
| location: location || null, | |
| time_of_day: timeOfDay || null | |
| }; | |
| } | |
| function collectCharacters(scenes) { | |
| const set = new Set(); | |
| for (const scene of scenes) { | |
| for (const el of scene.elements) { | |
| if (el.type === 'character' && el.name) { | |
| set.add(normalizeCharacterName(el.name)); | |
| } | |
| } | |
| } | |
| return Array.from(set).sort(); | |
| } | |
| function normalizeCharacterName(raw) { | |
| // Quitar anotaciones tipo "(O.S.)", "(V.O.)", "(CONT'D)" para agrupar mejor. | |
| return raw.replace(/\s*\([^)]*\)\s*$/g, '').trim(); | |
| } | |
| function computeStats(scenes, paragraphs) { | |
| let interior = 0; | |
| let exterior = 0; | |
| let day = 0; | |
| let night = 0; | |
| let dialogueCount = 0; | |
| let maxPage = 0; | |
| for (const scene of scenes) { | |
| const ie = scene.interior_exterior || ''; | |
| if (ie.startsWith('INT')) interior += 1; | |
| if (ie.startsWith('EXT')) exterior += 1; | |
| const tod = (scene.time_of_day || '').toUpperCase(); | |
| if (/(DÍA|DIA|DAY|MAÑANA|MANANA|MORNING|TARDE|AFTERNOON)/.test(tod)) day += 1; | |
| if (/(NOCHE|NIGHT|MADRUGADA)/.test(tod)) night += 1; | |
| if (Number.isFinite(scene.start_page) && scene.start_page > maxPage) { | |
| maxPage = scene.start_page; | |
| } | |
| for (const el of scene.elements) { | |
| if (el.type === 'character' && el.dialogue) dialogueCount += 1; | |
| } | |
| } | |
| // Páginas reales: máxima Page declarada en SceneProperties del FDX. | |
| // Si Final Draft no las ha generado (sin repaginar), `pages` queda null. | |
| const pages = maxPage > 0 ? maxPage : null; | |
| return { | |
| scene_count: scenes.length, | |
| interior_count: interior, | |
| exterior_count: exterior, | |
| day_count: day, | |
| night_count: night, | |
| character_count: collectCharacters(scenes).length, | |
| dialogue_count: dialogueCount, | |
| pages | |
| }; | |
| } | |