File size: 1,472 Bytes
7498f2c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
from __future__ import annotations
from typing import List
import re

from .text import extract_keywords_from_text


def _fallback_distill(text: str, max_sentences: int = 10) -> List[str]:
    # Very simple sentence ranking by keyword hits
    sentences = re.split(r"(?<=[.!?])\s+", text.strip())
    if not sentences:
        return []
    joined = " ".join(sentences)
    kws = set(extract_keywords_from_text(joined, top_k=50))
    scored = []
    for s in sentences:
        score = sum(1 for k in kws if k.lower() in s.lower())
        if len(s) > 20:
            scored.append((score, s))
    scored.sort(key=lambda x: x[0], reverse=True)
    return [s for _, s in scored[:max_sentences]]


def distill_text(text: str, max_points: int = 10) -> List[str]:
    if not text or not text.strip():
        return []
    try:
        # Optional dependency
        import langextract  # type: ignore
        # Basic usage: extract key sentences/phrases
        # The API may differ; attempt a generic call, fallback otherwise
        try:
            result = langextract.extract(text)  # type: ignore
            if isinstance(result, list):
                bullets = [str(x) for x in result][:max_points]
                if bullets:
                    return bullets
        except Exception:
            pass
    except Exception:
        pass
    # Fallback heuristic
    return _fallback_distill(text, max_sentences=max_points)