Spaces:

cybermedia
/

darkmedia-x-api

Sleeping

File size: 3,397 Bytes

343eed9

"""
Analyseur de Structure Intelligent pour DarkMedia-X Studio.
Sépare visuels et narrations par analyse de contenu plutôt que par Regex complexes.
"""
import re
from pathlib import Path

def is_visual(line):
    """Détermine si une ligne décrit une image."""
    l = line.lower()
    keywords = [
        'plan ', 'vue ', 'image', 'camera', 'visual', 'prompt', 'visuel', 
        '--ar', 'style', 'cinématique', 'gros plan', 'trajectoire', 
        'fond noir', 'ambiance', 'texture', 'éclairage'
    ]
    # Si la ligne commence par un label de prompt ou contient un mot technique
    return any(x in l for x in keywords) or l.startswith('- visual')

def is_narration(line):
    """Détermine si une ligne est destinée à être lue."""
    l = line.lower()
    # Si la ligne est entre guillemets ou commence par un label de parole
    if re.match(r'^[«"„“].*[»"”]$', line.strip()): return True
    labels = ['narration', "l'histoire", 'paroles', 'audio', 'texte']
    return any(l.startswith(x) for x in labels)

def clean_content(text):
    """Nettoie le bruit Markdown et les labels."""
    if not text: return ""
    # Supprimer les labels au début (ex: "Narration :")
    text = re.sub(r'^(.*?)\s*[:\-]\s*', '', text).strip()
    # Supprimer les astérisques et guillemets
    text = text.replace('**', '').replace('"', '').replace('«', '').replace('»', '')
    return text.strip()

def normalize_content(content):
    # Nettoyage global préliminaire
    content = re.sub(r"Gemini said.*?\n", "", content, flags=re.DOTALL)
    content = re.sub(r"\(\d+-\d+s\)", "", content)
    
    # Découpage par scènes (##)
    parts = re.split(r"(?:\n|^)##\s*", content)
    intro = parts[0].strip()
    new_content = intro + "\n\n" if intro else ""
    
    for p in parts[1:]:
        if not p.strip(): continue
        
        lines = p.split('\n')
        header = lines[0].strip()
        body_lines = lines[1:]
        
        visual_pool = []
        narration_pool = []
        
        for line in body_lines:
            line = line.strip()
            if not line or line.startswith('##'): continue
            
            if is_visual(line):
                visual_pool.append(clean_content(line))
            elif is_narration(line):
                narration_pool.append(clean_content(line))
            else:
                # Heuristique finale : si c'est court et sans ponctuation finale, 
                # c'est probablement du visuel, sinon de la narration.
                if len(line) < 50 and not any(line.endswith(x) for x in ['.', '!', '?', '"']):
                    visual_pool.append(clean_content(line))
                else:
                    narration_pool.append(clean_content(line))

        # Re-synthèse propre
        visual_text = " ".join(visual_pool)
        narration_text = " ".join(narration_pool)

        new_content += f"## {header}\n"
        new_content += f"**Visual Prompt :** {visual_text}\n"
        new_content += f"**Narration :** \"{narration_text}\"\n\n"
        
    return new_content.strip()

def normalize_file(file_path):
    path = Path(file_path)
    if not path.exists(): return False
    content = path.read_text(encoding="utf-8")
    normalized = normalize_content(content)
    if normalized.strip() != content.strip():
        path.write_text(normalized, encoding="utf-8")
        return True
    return False