Spaces:

kanslor821
/

espa_model

Sleeping

File size: 7,521 Bytes

0f34acb

"""

Основной модуль суммаризатора, объединяющий все компоненты

"""
import numpy as np
from audio_processing import preprocess_audio
from transcription import transcribe_audio, segment_sentences
from text_features import TextFeaturesExtractor
from paralinguistic_features import ParalinguisticFeaturesExtractor
from anomaly import compute_anomaly
from position_weight import compute_position_weight
from semantic_similarity import SemanticSimilarity
from iterative_selection import iterative_selection


class AudioSummarizer:
    """Основной класс для суммаризации аудиозаписей."""

    def __init__(self,

                 beta: list = None,

                 lambd: float = 0.5,

                 delta: float = 0.1,

                 theta: float = 0.6,

                 alpha: float = 0.7,

                 L: int = 2):
        """

        Args:

            beta: веса признаков [pitch_std, volume_std, pause_duration, tempo_std]

            lambd: вес аномалии

            delta: сила позиционного эффекта

            theta: коэффициент подавления избыточности

            alpha: баланс между IDF и POS в сходстве

            L: радиус локальной окрестности для аномалии

        """
        self.beta = beta if beta is not None else [0.2, 0.25, 0.3, -0.15]
        self.lambd = lambd
        self.delta = delta
        self.theta = theta
        self.alpha = alpha
        self.L = L

        self.text_extractor = TextFeaturesExtractor()
        self.similarity_calculator = SemanticSimilarity(alpha=alpha)

    def summarize(self, audio_path: str, k: int = 5, min_words: int = 4) -> dict:
        """

        Полный пайплайн суммаризации.



        Args:

            audio_path: путь к аудиофайлу

            k: количество предложений в реферате

            min_words: минимальное количество слов в предложении (короткие игнорируются)

        """

        print("=" * 60)
        print("ШАГ 1: Предобработка аудио")
        print("=" * 60)
        processed_audio = preprocess_audio(audio_path)

        print("\n" + "=" * 60)
        print("ШАГ 2: Транскрибация (Whisper)")
        print("=" * 60)
        transcription = transcribe_audio(processed_audio)

        print("\n" + "=" * 60)
        print("ШАГ 3: Сегментация на предложения")
        print("=" * 60)
        sentences = segment_sentences(transcription)

        # Фильтрация коротких предложений
        original_count = len(sentences)
        filtered_sentences = []
        for sent in sentences:
            word_count = len(sent['text'].split())
            if word_count >= min_words:
                filtered_sentences.append(sent)
            else:
                print(f"  Игнорируется короткое предложение ({word_count} слов): \"{sent['text']}\"")

        sentences = filtered_sentences
        print(
            f"  Обнаружено предложений: {original_count}, отфильтровано коротких: {original_count - len(sentences)}, осталось: {len(sentences)}")

        # Если после фильтрации не осталось предложений
        if len(sentences) == 0:
            print("  ОШИБКА: Не осталось предложений после фильтрации!")
            return {"summary": [], "full_text": "", "error": "No sentences after filtering"}

        sentences_text = [s['text'] for s in sentences]
        N = len(sentences)
        print(f"  Обнаружено предложений: {N}")

        print("\n" + "=" * 60)
        print("ШАГ 4: Извлечение текстовых признаков")
        print("=" * 60)
        bm25, textrank, T_base = self.text_extractor.extract(sentences_text)

        print("\n" + "=" * 60)
        print("ШАГ 5: Извлечение паралингвистических признаков")
        print("=" * 60)
        para_extractor = ParalinguisticFeaturesExtractor(processed_audio)
        para_matrix = para_extractor.extract(sentences)

        print("\n" + "=" * 60)
        print("ШАГ 6: Вычисление аномальной важности")
        print("=" * 60)
        anomaly = compute_anomaly(para_matrix, L=self.L)

        print("\n" + "=" * 60)
        print("ШАГ 7: Вычисление позиционного веса")
        print("=" * 60)
        position_weights = compute_position_weight(N, delta=self.delta)

        print("\n" + "=" * 60)
        print("ШАГ 8: Расчёт итоговой оценки Score")
        print("=" * 60)
        para_sum = para_matrix @ np.array(self.beta)
        modulator = np.exp(para_sum)
        scores = T_base * modulator * (1 + self.lambd * anomaly) * position_weights

        for i, score in enumerate(scores):
            print(f"\n  S{i + 1}:")
            print(f"    Score = {score:.4f}")
            print(f"    Текст: {sentences_text[i][:150]}{'...' if len(sentences_text[i]) > 150 else ''}")
            print(
                f"    T_base = {T_base[i]:.4f}, M = {modulator[i]:.4f}, A = {anomaly[i]:.4f}, π = {position_weights[i]:.4f}")
            print(f"    Паралингвистические признаки:")
            print(f"      pitch_std = {para_matrix[i][0]:.4f}")
            print(f"      volume_std = {para_matrix[i][1]:.4f}")
            print(f"      pause_duration = {para_matrix[i][2]:.4f}")
            print(f"      tempo_std = {para_matrix[i][3]:.4f}")

        print("\n" + "=" * 60)
        print("ШАГ 9: Вычисление семантического сходства")
        print("=" * 60)
        similarity_matrix = self.similarity_calculator.compute_matrix(sentences_text)

        print("\n" + "=" * 60)
        print(f"ШАГ 10: Итеративный отбор (k={k}, θ={self.theta})")
        print("=" * 60)
        selected_indices = iterative_selection(scores, similarity_matrix, k, theta=self.theta)

        print("\n" + "=" * 60)
        print("ШАГ 11: Построение реферата")
        print("=" * 60)
        summary = [sentences_text[i] for i in selected_indices]

        print("\n" + "=" * 60)
        print("ПОЛНЫЙ РАСШИФРОВАННЫЙ ТЕКСТ")
        print("=" * 60)
        full_text = " ".join(sentences_text)
        print(full_text)

        print("\n" + "=" * 60)
        print("ИТОГОВЫЙ РЕФЕРАТ")
        print("=" * 60)
        for i, sent in enumerate(summary, 1):
            print(f"{i}. {sent}")

        return {
            "summary": summary,
            "full_text": full_text,
            "scores": scores,
            "selected_indices": selected_indices,
            "sentences": sentences_text,
            "para_features": para_matrix,
            "T_base": T_base,
            "modulator": modulator,
            "anomaly": anomaly,
            "position_weights": position_weights
        }