Spaces:
Sleeping
Sleeping
| import math | |
| from typing import Dict, List, Any | |
| def generate_summary( | |
| sentences_data: List[Dict[str, Any]], | |
| word_weights: Dict[str, int], | |
| compression_ratio: float = 0.1, | |
| ) -> List[Dict[str, Any]]: | |
| """ | |
| Реферат: отбирает самые значимые предложения. | |
| score = сумма весов уникальных лемм / sqrt(длина_предложения) | |
| """ | |
| if not sentences_data: | |
| return [] | |
| compression_ratio = max(0.05, min(0.8, compression_ratio)) | |
| ranked: List[Dict[str, Any]] = [] | |
| for sent in sentences_data: | |
| uniq = set(sent.get("lemmas_clean", [])) | |
| if not uniq: | |
| score = 0.0 | |
| else: | |
| numerator = float(sum(word_weights.get(l, 0) for l in uniq)) | |
| penalty = math.sqrt(max(1, len(sent.get("tokens", [])))) | |
| score = numerator / penalty | |
| ranked.append( | |
| { | |
| "index": sent.get("index", 0), | |
| "text": sent.get("raw_text", ""), | |
| "score": round(score, 3), | |
| } | |
| ) | |
| ranked.sort(key=lambda x: x["score"], reverse=True) | |
| take_n = max(1, int(round(len(ranked) * compression_ratio))) | |
| chosen = ranked[:take_n] | |
| chosen.sort(key=lambda x: x["index"]) | |
| return chosen | |