Spaces:
Sleeping
Sleeping
| """Abstract base class for summarization tasks.""" | |
| from abc import ABC, abstractmethod | |
| from typing import Dict, Any, Optional | |
| class BaseSummarizationTask(ABC): | |
| """Base class for all summarization tasks. | |
| Each task loads a dataset, samples an example, and constructs | |
| the episode data (truncated context, question, ground truth answers). | |
| """ | |
| name: str = "base" | |
| max_steps: int = 2 # default: summarize + answer | |
| def infer_category(self, question: str) -> str: | |
| """Infer a coarse document category for richer benchmark metadata.""" | |
| q = question.lower() | |
| if any(word in q for word in ["who", "born", "war", "empire", "king", "queen"]): | |
| return "history" | |
| if any(word in q for word in ["city", "country", "river", "mountain", "where"]): | |
| return "geography" | |
| if any(word in q for word in ["process", "chemical", "cell", "atom", "science"]): | |
| return "science" | |
| if any(word in q for word in ["programming", "software", "language", "python", "code"]): | |
| return "software" | |
| return "general" | |
| def get_sample(self, seed: Optional[int] = None) -> Dict[str, Any]: | |
| """Return a single episode sample. | |
| Returns a dict with: | |
| - context: str Full context text | |
| - truncated_context: str Visible portion of context | |
| - truncation_ratio: float Fraction shown (e.g. 0.7) | |
| - category: str Coarse domain/category | |
| - source_type: str Source style (encyclopedic, report, paper, etc.) | |
| - question: str The question to answer | |
| - answer: str Primary ground-truth answer | |
| - answer_list: list[str] All valid answers (for F1 scoring) | |
| """ | |
| def get_system_prompt(self) -> str: | |
| return ( | |
| "You are preparing a compact working memory for another assistant that will " | |
| "not get to read the original document. Summaries must be concise, faithful, " | |
| "and dense with facts that are likely to matter for later question answering." | |
| ) | |
| def get_summarize_prompt(self, truncated_context: str, truncation_ratio: float) -> str: | |
| pct = int(truncation_ratio * 100) | |
| return ( | |
| f"Here is a document excerpt (you are seeing approximately {pct}% of the full text):\n\n" | |
| f"{truncated_context}\n\n" | |
| "Write a compact summary for downstream use. Preserve concrete details such as " | |
| "names, dates, quantities, entities, causal links, and definitions that a later " | |
| "assistant might need in order to answer factual questions without the source text." | |
| ) | |
| def get_answer_prompt(self, question: str) -> str: | |
| return ( | |
| f"Based on your summary of the document, please answer the following question:\n\n" | |
| f"Question: {question}\n\n" | |
| "Answer using only information that was preserved in the summary. " | |
| "Provide a direct, concise answer. If the answer is a specific name, number, or phrase, give just that." | |
| ) | |