Spaces:

kanslor821
/

espa_model

Sleeping

App Files Files Community

espa_model / summarizer.py

kanslor821

Upload 10 files

0f34acb verified about 2 months ago

raw

history blame contribute delete

7.52 kB

	"""
	Основной модуль суммаризатора, объединяющий все компоненты
	"""
	import numpy as np
	from audio_processing import preprocess_audio
	from transcription import transcribe_audio, segment_sentences
	from text_features import TextFeaturesExtractor
	from paralinguistic_features import ParalinguisticFeaturesExtractor
	from anomaly import compute_anomaly
	from position_weight import compute_position_weight
	from semantic_similarity import SemanticSimilarity
	from iterative_selection import iterative_selection


	class AudioSummarizer:
	"""Основной класс для суммаризации аудиозаписей."""

	def __init__(self,
	beta: list = None,
	lambd: float = 0.5,
	delta: float = 0.1,
	theta: float = 0.6,
	alpha: float = 0.7,
	L: int = 2):
	"""
	Args:
	beta: веса признаков [pitch_std, volume_std, pause_duration, tempo_std]
	lambd: вес аномалии
	delta: сила позиционного эффекта
	theta: коэффициент подавления избыточности
	alpha: баланс между IDF и POS в сходстве
	L: радиус локальной окрестности для аномалии
	"""
	self.beta = beta if beta is not None else [0.2, 0.25, 0.3, -0.15]
	self.lambd = lambd
	self.delta = delta
	self.theta = theta
	self.alpha = alpha
	self.L = L

	self.text_extractor = TextFeaturesExtractor()
	self.similarity_calculator = SemanticSimilarity(alpha=alpha)

	def summarize(self, audio_path: str, k: int = 5, min_words: int = 4) -> dict:
	"""
	Полный пайплайн суммаризации.

	Args:
	audio_path: путь к аудиофайлу
	k: количество предложений в реферате
	min_words: минимальное количество слов в предложении (короткие игнорируются)
	"""

	print("=" * 60)
	print("ШАГ 1: Предобработка аудио")
	print("=" * 60)
	processed_audio = preprocess_audio(audio_path)

	print("\n" + "=" * 60)
	print("ШАГ 2: Транскрибация (Whisper)")
	print("=" * 60)
	transcription = transcribe_audio(processed_audio)

	print("\n" + "=" * 60)
	print("ШАГ 3: Сегментация на предложения")
	print("=" * 60)
	sentences = segment_sentences(transcription)

	# Фильтрация коротких предложений
	original_count = len(sentences)
	filtered_sentences = []
	for sent in sentences:
	word_count = len(sent['text'].split())
	if word_count >= min_words:
	filtered_sentences.append(sent)
	else:
	print(f" Игнорируется короткое предложение ({word_count} слов): \"{sent['text']}\"")

	sentences = filtered_sentences
	print(
	f" Обнаружено предложений: {original_count}, отфильтровано коротких: {original_count - len(sentences)}, осталось: {len(sentences)}")

	# Если после фильтрации не осталось предложений
	if len(sentences) == 0:
	print(" ОШИБКА: Не осталось предложений после фильтрации!")
	return {"summary": [], "full_text": "", "error": "No sentences after filtering"}

	sentences_text = [s['text'] for s in sentences]
	N = len(sentences)
	print(f" Обнаружено предложений: {N}")

	print("\n" + "=" * 60)
	print("ШАГ 4: Извлечение текстовых признаков")
	print("=" * 60)
	bm25, textrank, T_base = self.text_extractor.extract(sentences_text)

	print("\n" + "=" * 60)
	print("ШАГ 5: Извлечение паралингвистических признаков")
	print("=" * 60)
	para_extractor = ParalinguisticFeaturesExtractor(processed_audio)
	para_matrix = para_extractor.extract(sentences)

	print("\n" + "=" * 60)
	print("ШАГ 6: Вычисление аномальной важности")
	print("=" * 60)
	anomaly = compute_anomaly(para_matrix, L=self.L)

	print("\n" + "=" * 60)
	print("ШАГ 7: Вычисление позиционного веса")
	print("=" * 60)
	position_weights = compute_position_weight(N, delta=self.delta)

	print("\n" + "=" * 60)
	print("ШАГ 8: Расчёт итоговой оценки Score")
	print("=" * 60)
	para_sum = para_matrix @ np.array(self.beta)
	modulator = np.exp(para_sum)
	scores = T_base * modulator * (1 + self.lambd * anomaly) * position_weights

	for i, score in enumerate(scores):
	print(f"\n S{i + 1}:")
	print(f" Score = {score:.4f}")
	print(f" Текст: {sentences_text[i][:150]}{'...' if len(sentences_text[i]) > 150 else ''}")
	print(
	f" T_base = {T_base[i]:.4f}, M = {modulator[i]:.4f}, A = {anomaly[i]:.4f}, π = {position_weights[i]:.4f}")
	print(f" Паралингвистические признаки:")
	print(f" pitch_std = {para_matrix[i][0]:.4f}")
	print(f" volume_std = {para_matrix[i][1]:.4f}")
	print(f" pause_duration = {para_matrix[i][2]:.4f}")
	print(f" tempo_std = {para_matrix[i][3]:.4f}")

	print("\n" + "=" * 60)
	print("ШАГ 9: Вычисление семантического сходства")
	print("=" * 60)
	similarity_matrix = self.similarity_calculator.compute_matrix(sentences_text)

	print("\n" + "=" * 60)
	print(f"ШАГ 10: Итеративный отбор (k={k}, θ={self.theta})")
	print("=" * 60)
	selected_indices = iterative_selection(scores, similarity_matrix, k, theta=self.theta)

	print("\n" + "=" * 60)
	print("ШАГ 11: Построение реферата")
	print("=" * 60)
	summary = [sentences_text[i] for i in selected_indices]

	print("\n" + "=" * 60)
	print("ПОЛНЫЙ РАСШИФРОВАННЫЙ ТЕКСТ")
	print("=" * 60)
	full_text = " ".join(sentences_text)
	print(full_text)

	print("\n" + "=" * 60)
	print("ИТОГОВЫЙ РЕФЕРАТ")
	print("=" * 60)
	for i, sent in enumerate(summary, 1):
	print(f"{i}. {sent}")

	return {
	"summary": summary,
	"full_text": full_text,
	"scores": scores,
	"selected_indices": selected_indices,
	"sentences": sentences_text,
	"para_features": para_matrix,
	"T_base": T_base,
	"modulator": modulator,
	"anomaly": anomaly,
	"position_weights": position_weights
	}