Spaces:
Sleeping
Sleeping
| import re, numpy as np | |
| from typing import List | |
| from embedder import embed_texts | |
| def split_sentences(text: str) -> List[str]: | |
| sents = re.split(r'(?<=[\.\!\?])\s+', text.strip()) | |
| return [s.strip() for s in sents if s.strip()] | |
| def mmr_summarize(text: str, max_sentences: int = 4, diversity: float = 0.6) -> str: | |
| sents = split_sentences(text) | |
| if not sents: return text.strip() | |
| if len(sents) <= max_sentences: return " ".join(sents) | |
| embs = embed_texts(sents) | |
| embs = np.array(embs) | |
| centroid = embs.mean(axis=0) | |
| centroid = centroid / (np.linalg.norm(centroid) + 1e-8) | |
| selected = [int(np.argmax(embs @ centroid))] | |
| while len(selected) < max_sentences: | |
| best, idx = -1e9, None | |
| for i in range(len(sents)): | |
| if i in selected: continue | |
| rel = float(embs[i] @ centroid) | |
| red = max(float(embs[i] @ embs[j]) for j in selected) if selected else 0.0 | |
| score = diversity*rel - (1-diversity)*red | |
| if score > best: | |
| best, idx = score, i | |
| if idx is None: break | |
| selected.append(idx) | |
| selected.sort() | |
| return " ".join(sents[i] for i in selected) | |