| |
| """ |
| Generate one-sentence summaries using the distilled Qwen3-0.6B model. |
| |
| Uses llama-cpp-python for CPU inference. ~3-5s per summary. |
| |
| Usage: |
| # Single text |
| python3 summarize.py "Eric wants to add dark mode to the webui-next app." |
| |
| # From stdin |
| echo "Some text to summarize" | python3 summarize.py |
| |
| # Batch from JSON (list of {"id": ..., "text": ...}) |
| python3 summarize.py --batch input.json --output results.json |
| |
| # As a library |
| from summarize import Summarizer |
| s = Summarizer() |
| print(s.summarize("Some text here")) |
| """ |
| import argparse |
| import json |
| import os |
| import sys |
| import time |
| from pathlib import Path |
|
|
| PROJECT_DIR = Path(__file__).parent |
| MODEL_PATH = str(PROJECT_DIR / "models" / "qwen3-0.6b-summarizer-q8_0.gguf") |
| PROMPT_TEMPLATE = "Summarize in one sentence:\n{text}\n\nSummary:" |
|
|
|
|
| class Summarizer: |
| """CPU-based one-sentence summarizer using distilled Qwen3-0.6B.""" |
|
|
| def __init__(self, model_path=MODEL_PATH, n_threads=8, n_ctx=512): |
| from llama_cpp import Llama |
| self.llm = Llama(model_path, n_ctx=n_ctx, n_threads=n_threads, verbose=False) |
|
|
| def summarize(self, text, max_tokens=80, temperature=0.3, max_input_chars=2000): |
| """Generate a one-sentence summary of the input text. |
| |
| Args: |
| text: Raw text to summarize (markdown, chat messages, task descriptions, etc.) |
| max_tokens: Maximum output tokens (summaries are typically 20-40 tokens) |
| temperature: Sampling temperature (0.3 = mostly deterministic) |
| max_input_chars: Truncate input beyond this length |
| |
| Returns: |
| One-sentence summary string |
| """ |
| prompt = PROMPT_TEMPLATE.format(text=text[:max_input_chars]) |
| out = self.llm(prompt, max_tokens=max_tokens, temperature=temperature, |
| stop=["\n", "<|im_end|>", "<|endoftext|>"]) |
| return out["choices"][0]["text"].strip() |
|
|
| def summarize_batch(self, items, progress=True): |
| """Summarize a list of {"id": ..., "text": ...} dicts. |
| |
| Returns list of {"id": ..., "text": ..., "summary": ..., "time_s": ...} |
| """ |
| results = [] |
| for i, item in enumerate(items): |
| t0 = time.time() |
| summary = self.summarize(item["text"]) |
| elapsed = time.time() - t0 |
| results.append({ |
| "id": item.get("id", i), |
| "text": item["text"][:200], |
| "summary": summary, |
| "time_s": round(elapsed, 2), |
| }) |
| if progress and (i + 1) % 10 == 0: |
| rate = (i + 1) / sum(r["time_s"] for r in results) |
| eta = (len(items) - i - 1) / rate |
| print(f" [{i+1}/{len(items)}] {rate:.1f}/s, ETA {eta:.0f}s", |
| file=sys.stderr, flush=True) |
| return results |
|
|
|
|
| def main(): |
| parser = argparse.ArgumentParser(description="Generate one-sentence summaries") |
| parser.add_argument("text", nargs="?", help="Text to summarize") |
| parser.add_argument("--batch", help="JSON file with list of {id, text} objects") |
| parser.add_argument("--output", help="Output JSON file for batch mode") |
| parser.add_argument("--threads", type=int, default=8) |
| parser.add_argument("--model", default=MODEL_PATH) |
| args = parser.parse_args() |
|
|
| print("Loading summarizer...", file=sys.stderr, flush=True) |
| t0 = time.time() |
| s = Summarizer(model_path=args.model, n_threads=args.threads) |
| print(f"Ready ({time.time()-t0:.1f}s)", file=sys.stderr, flush=True) |
|
|
| if args.batch: |
| with open(args.batch) as f: |
| items = json.load(f) |
| print(f"Summarizing {len(items)} items...", file=sys.stderr, flush=True) |
| results = s.summarize_batch(items) |
| if args.output: |
| with open(args.output, "w") as f: |
| json.dump(results, f, indent=2) |
| print(f"Results saved to {args.output}", file=sys.stderr) |
| else: |
| for r in results: |
| print(json.dumps(r)) |
| elif args.text: |
| print(s.summarize(args.text)) |
| elif not sys.stdin.isatty(): |
| text = sys.stdin.read().strip() |
| if text: |
| print(s.summarize(text)) |
| else: |
| parser.print_help() |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|