File size: 1,303 Bytes
694e586
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import math
from typing import Dict, List, Any


def generate_summary(
    sentences_data: List[Dict[str, Any]],
    word_weights: Dict[str, int],
    compression_ratio: float = 0.1,
) -> List[Dict[str, Any]]:
    """
    Реферат: отбирает самые значимые предложения.
    score = сумма весов уникальных лемм / sqrt(длина_предложения)
    """
    if not sentences_data:
        return []

    compression_ratio = max(0.05, min(0.8, compression_ratio))
    ranked: List[Dict[str, Any]] = []

    for sent in sentences_data:
        uniq = set(sent.get("lemmas_clean", []))
        if not uniq:
            score = 0.0
        else:
            numerator = float(sum(word_weights.get(l, 0) for l in uniq))
            penalty = math.sqrt(max(1, len(sent.get("tokens", []))))
            score = numerator / penalty

        ranked.append(
            {
                "index": sent.get("index", 0),
                "text": sent.get("raw_text", ""),
                "score": round(score, 3),
            }
        )

    ranked.sort(key=lambda x: x["score"], reverse=True)
    take_n = max(1, int(round(len(ranked) * compression_ratio)))
    chosen = ranked[:take_n]
    chosen.sort(key=lambda x: x["index"])
    return chosen