Spaces:

cybermedia
/

darkmedia-x-api

Sleeping

App Files Files Community

darkmedia-x-api / engine /utils /normalizer.py

cybermedia

Upload folder using huggingface_hub

343eed9 verified 24 days ago

raw

history blame contribute delete

3.4 kB

	"""
	Analyseur de Structure Intelligent pour DarkMedia-X Studio.
	Sépare visuels et narrations par analyse de contenu plutôt que par Regex complexes.
	"""
	import re
	from pathlib import Path

	def is_visual(line):
	"""Détermine si une ligne décrit une image."""
	l = line.lower()
	keywords = [
	'plan ', 'vue ', 'image', 'camera', 'visual', 'prompt', 'visuel',
	'--ar', 'style', 'cinématique', 'gros plan', 'trajectoire',
	'fond noir', 'ambiance', 'texture', 'éclairage'
	]
	# Si la ligne commence par un label de prompt ou contient un mot technique
	return any(x in l for x in keywords) or l.startswith('- visual')

	def is_narration(line):
	"""Détermine si une ligne est destinée à être lue."""
	l = line.lower()
	# Si la ligne est entre guillemets ou commence par un label de parole
	if re.match(r'^[«"„“].*[»"”]$', line.strip()): return True
	labels = ['narration', "l'histoire", 'paroles', 'audio', 'texte']
	return any(l.startswith(x) for x in labels)

	def clean_content(text):
	"""Nettoie le bruit Markdown et les labels."""
	if not text: return ""
	# Supprimer les labels au début (ex: "Narration :")
	text = re.sub(r'^(.?)\s[:\-]\s*', '', text).strip()
	# Supprimer les astérisques et guillemets
	text = text.replace('**', '').replace('"', '').replace('«', '').replace('»', '')
	return text.strip()

	def normalize_content(content):
	# Nettoyage global préliminaire
	content = re.sub(r"Gemini said.*?\n", "", content, flags=re.DOTALL)
	content = re.sub(r"$\d+-\d+s$", "", content)

	# Découpage par scènes (##)
	parts = re.split(r"(?:\n\|^)##\s*", content)
	intro = parts[0].strip()
	new_content = intro + "\n\n" if intro else ""

	for p in parts[1:]:
	if not p.strip(): continue

	lines = p.split('\n')
	header = lines[0].strip()
	body_lines = lines[1:]

	visual_pool = []
	narration_pool = []

	for line in body_lines:
	line = line.strip()
	if not line or line.startswith('##'): continue

	if is_visual(line):
	visual_pool.append(clean_content(line))
	elif is_narration(line):
	narration_pool.append(clean_content(line))
	else:
	# Heuristique finale : si c'est court et sans ponctuation finale,
	# c'est probablement du visuel, sinon de la narration.
	if len(line) < 50 and not any(line.endswith(x) for x in ['.', '!', '?', '"']):
	visual_pool.append(clean_content(line))
	else:
	narration_pool.append(clean_content(line))

	# Re-synthèse propre
	visual_text = " ".join(visual_pool)
	narration_text = " ".join(narration_pool)

	new_content += f"## {header}\n"
	new_content += f"Visual Prompt : {visual_text}\n"
	new_content += f"Narration : \"{narration_text}\"\n\n"

	return new_content.strip()

	def normalize_file(file_path):
	path = Path(file_path)
	if not path.exists(): return False
	content = path.read_text(encoding="utf-8")
	normalized = normalize_content(content)
	if normalized.strip() != content.strip():
	path.write_text(normalized, encoding="utf-8")
	return True
	return False