Spaces:
Sleeping
Sleeping
| """ | |
| Configuration Module | |
| ==================== | |
| Handles loading and managing configuration for the entire system. | |
| """ | |
| from __future__ import annotations | |
| import os | |
| from dataclasses import dataclass, field | |
| from typing import List, Optional | |
| import yaml | |
| class VADConfig: | |
| """Voice Activity Detection configuration""" | |
| threshold: float = 0.5 | |
| min_speech_duration: float = 0.3 | |
| min_silence_duration: float = 0.3 | |
| speech_pad_ms: int = 30 | |
| class SegmentationConfig: | |
| """Segmentation configuration""" | |
| window_duration: float = 1.5 | |
| window_hop: float = 0.75 | |
| min_segment_duration: float = 0.5 | |
| class EmbeddingConfig: | |
| """Speaker embedding configuration""" | |
| model_id: str = "speechbrain/spkrec-ecapa-voxceleb" | |
| embedding_dim: int = 192 | |
| class ClusteringConfig: | |
| """Clustering configuration""" | |
| method: str = "agglomerative" | |
| threshold: float = 0.7 | |
| min_cluster_size: int = 2 | |
| linkage: str = "average" | |
| class AudioConfig: | |
| """Audio processing configuration""" | |
| sample_rate: int = 16000 | |
| mono: bool = True | |
| normalize: bool = True | |
| trim_silence: bool = False | |
| max_duration_minutes: int = 60 | |
| class DiarizationConfig: | |
| """Speaker diarization configuration""" | |
| vad: VADConfig = field(default_factory=VADConfig) | |
| segmentation: SegmentationConfig = field(default_factory=SegmentationConfig) | |
| embedding: EmbeddingConfig = field(default_factory=EmbeddingConfig) | |
| clustering: ClusteringConfig = field(default_factory=ClusteringConfig) | |
| merge_gap_threshold: float = 0.5 | |
| min_segment_duration: float = 0.3 | |
| smooth_segments: bool = True | |
| # Embedding and collapse options | |
| use_speechbrain: bool = True | |
| allow_fallback: bool = False | |
| collapse_threshold: float = 0.15 | |
| silhouette_collapse_threshold: float = 0.05 | |
| class ASRConfig: | |
| """ASR configuration""" | |
| model_id: str = "indonesian-nlp/wav2vec2-large-xlsr-indonesian" | |
| chunk_length_s: float = 30.0 | |
| stride_length_s: float = 5.0 | |
| batch_size: int = 4 | |
| return_timestamps: Optional[str] = None | |
| # Valid values: None (no timestamps), or 'char' / 'word' for CTC timestamp modes | |
| capitalize_sentences: bool = True | |
| normalize_whitespace: bool = True | |
| class SummarizationConfig: | |
| """Summarization configuration""" | |
| model_id: str = "indobenchmark/indobert-base-p1" | |
| sentence_model_id: str = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2" | |
| num_sentences: int = 5 | |
| min_sentence_length: int = 10 | |
| max_sentence_length: int = 200 | |
| position_weight: float = 0.1 | |
| decision_keywords: List[str] = field( | |
| default_factory=lambda: [ | |
| "diputuskan", | |
| "disepakati", | |
| "kesimpulan", | |
| "keputusan", | |
| "jadi", | |
| "maka", | |
| "sepakat", | |
| "setuju", | |
| "final", | |
| ] | |
| ) | |
| action_keywords: List[str] = field( | |
| default_factory=lambda: [ | |
| "akan", | |
| "harus", | |
| "perlu", | |
| "tolong", | |
| "mohon", | |
| "deadline", | |
| "target", | |
| "tugas", | |
| "tanggung jawab", | |
| "action item", | |
| "follow up", | |
| "tindak lanjut", | |
| ] | |
| ) | |
| class DocumentConfig: | |
| """Document generation configuration""" | |
| template: str = "default" | |
| title_font_size: int = 18 | |
| heading_font_size: int = 14 | |
| body_font_size: int = 11 | |
| font_family: str = "Calibri" | |
| include_timestamps: bool = True | |
| include_speaker_colors: bool = True | |
| class EvaluationConfig: | |
| """Evaluation configuration""" | |
| wer_lowercase: bool = True | |
| wer_remove_punctuation: bool = True | |
| der_collar: float = 0.25 | |
| der_skip_overlap: bool = False | |
| class PathsConfig: | |
| """Paths configuration""" | |
| models_dir: str = "./models" | |
| audio_dir: str = "./data/audio" | |
| ground_truth_dir: str = "./data/ground_truth" | |
| output_dir: str = "./data/output" | |
| cache_dir: str = "./cache" | |
| logs_dir: str = "./logs" | |
| class Config: | |
| """Main configuration class""" | |
| audio: AudioConfig = field(default_factory=AudioConfig) | |
| diarization: DiarizationConfig = field(default_factory=DiarizationConfig) | |
| asr: ASRConfig = field(default_factory=ASRConfig) | |
| summarization: SummarizationConfig = field(default_factory=SummarizationConfig) | |
| document: DocumentConfig = field(default_factory=DocumentConfig) | |
| evaluation: EvaluationConfig = field(default_factory=EvaluationConfig) | |
| paths: PathsConfig = field(default_factory=PathsConfig) | |
| device: str = "auto" | |
| verbose: bool = True | |
| def __post_init__(self): | |
| """Create directories if they don't exist""" | |
| for path_attr in [ | |
| "models_dir", | |
| "audio_dir", | |
| "ground_truth_dir", | |
| "output_dir", | |
| "cache_dir", | |
| "logs_dir", | |
| ]: | |
| path = getattr(self.paths, path_attr) | |
| os.makedirs(path, exist_ok=True) | |
| def load_config(config_path: str = "config.yaml") -> Config: | |
| """ | |
| Load configuration from YAML file. | |
| Args: | |
| config_path: Path to config.yaml file | |
| Returns: | |
| Config object with loaded settings | |
| """ | |
| config = Config() | |
| if os.path.exists(config_path): | |
| with open(config_path, "r", encoding="utf-8") as f: | |
| yaml_config = yaml.safe_load(f) | |
| if yaml_config: | |
| # Update audio config | |
| if "audio" in yaml_config: | |
| for key, value in yaml_config["audio"].items(): | |
| if hasattr(config.audio, key): | |
| setattr(config.audio, key, value) | |
| # Update ASR config | |
| if "asr" in yaml_config: | |
| for key, value in yaml_config["asr"].items(): | |
| if hasattr(config.asr, key): | |
| setattr(config.asr, key, value) | |
| # Update summarization config | |
| if "summarization" in yaml_config: | |
| for key, value in yaml_config["summarization"].items(): | |
| if hasattr(config.summarization, key): | |
| setattr(config.summarization, key, value) | |
| # Update paths config | |
| if "paths" in yaml_config: | |
| for key, value in yaml_config["paths"].items(): | |
| if hasattr(config.paths, key): | |
| setattr(config.paths, key, value) | |
| # Update device | |
| if "hardware" in yaml_config and "device" in yaml_config["hardware"]: | |
| config.device = yaml_config["hardware"]["device"] | |
| return config | |
| def save_config(config: Config, config_path: str = "config.yaml"): | |
| """ | |
| Save configuration to YAML file. | |
| Args: | |
| config: Config object to save | |
| config_path: Path to save config.yaml | |
| """ | |
| # Convert dataclass to dict | |
| config_dict = { | |
| "audio": config.audio.__dict__, | |
| "asr": config.asr.__dict__, | |
| "summarization": { | |
| k: v for k, v in config.summarization.__dict__.items() if not k.endswith("_keywords") | |
| }, | |
| "document": config.document.__dict__, | |
| "evaluation": config.evaluation.__dict__, | |
| "paths": config.paths.__dict__, | |
| "hardware": {"device": config.device}, | |
| } | |
| with open(config_path, "w", encoding="utf-8") as f: | |
| yaml.dump(config_dict, f, default_flow_style=False, allow_unicode=True) | |