Spaces:

Danielsz
/

testing

Sleeping

testing / interview /phrases_data.py

deploy

16e8be2 7 days ago

1.76 kB

	"""
	Interview phrases data loader.
	Reads the transcripciones.json and builds the structured phrases list.
	"""

	import json
	import logging
	import os
	import re
	from django.conf import settings

	log = logging.getLogger("interview.phrases_data")

	TRANSCRIPTIONS_FILE = os.path.join(settings.BASE_DIR, 'data', 'transcripciones.json')


	def _numeric_id_key(phrase: dict) -> int:
	"""Extract the numeric index from a phrase id for sorting."""
	match = re.search(r"(\d+)", phrase["id"])
	return int(match.group(1)) if match else 0


	def setup_data() -> list[dict]:
	"""
	Read transcripciones.json and return a list of phrase dicts with:
	{id, text, category, sentiment, original_file}
	"""
	if not os.path.exists(TRANSCRIPTIONS_FILE):
	log.error("Transcriptions file not found: %s", TRANSCRIPTIONS_FILE)
	return []

	with open(TRANSCRIPTIONS_FILE, "r", encoding="utf-8") as f:
	transcriptions = json.load(f)

	phrases = []

	# Process transcriptions
	for filename, text in transcriptions.items():
	file_id = filename.replace(".mp4", "")

	category = "unknown"
	sentiment = "neutral"
	if filename.startswith("preguntas_"):
	category = "questions"
	elif filename.startswith("positivas_"):
	category = "feedback"
	sentiment = "positive"
	elif filename.startswith("negativos_"):
	category = "feedback"
	sentiment = "negative"

	phrases.append({
	"id": file_id,
	"text": text.strip(),
	"category": category,
	"sentiment": sentiment,
	"original_file": filename,
	})

	log.info("Loaded %d phrases from transcriptions.", len(phrases))
	return phrases