ericflo's picture
Upload training/summarize.py with huggingface_hub
4915429 verified
#!/usr/bin/env python3
"""
Generate one-sentence summaries using the distilled Qwen3-0.6B model.
Uses llama-cpp-python for CPU inference. ~3-5s per summary.
Usage:
# Single text
python3 summarize.py "Eric wants to add dark mode to the webui-next app."
# From stdin
echo "Some text to summarize" | python3 summarize.py
# Batch from JSON (list of {"id": ..., "text": ...})
python3 summarize.py --batch input.json --output results.json
# As a library
from summarize import Summarizer
s = Summarizer()
print(s.summarize("Some text here"))
"""
import argparse
import json
import os
import sys
import time
from pathlib import Path
PROJECT_DIR = Path(__file__).parent
MODEL_PATH = str(PROJECT_DIR / "models" / "qwen3-0.6b-summarizer-q8_0.gguf")
PROMPT_TEMPLATE = "Summarize in one sentence:\n{text}\n\nSummary:"
class Summarizer:
"""CPU-based one-sentence summarizer using distilled Qwen3-0.6B."""
def __init__(self, model_path=MODEL_PATH, n_threads=8, n_ctx=512):
from llama_cpp import Llama
self.llm = Llama(model_path, n_ctx=n_ctx, n_threads=n_threads, verbose=False)
def summarize(self, text, max_tokens=80, temperature=0.3, max_input_chars=2000):
"""Generate a one-sentence summary of the input text.
Args:
text: Raw text to summarize (markdown, chat messages, task descriptions, etc.)
max_tokens: Maximum output tokens (summaries are typically 20-40 tokens)
temperature: Sampling temperature (0.3 = mostly deterministic)
max_input_chars: Truncate input beyond this length
Returns:
One-sentence summary string
"""
prompt = PROMPT_TEMPLATE.format(text=text[:max_input_chars])
out = self.llm(prompt, max_tokens=max_tokens, temperature=temperature,
stop=["\n", "<|im_end|>", "<|endoftext|>"])
return out["choices"][0]["text"].strip()
def summarize_batch(self, items, progress=True):
"""Summarize a list of {"id": ..., "text": ...} dicts.
Returns list of {"id": ..., "text": ..., "summary": ..., "time_s": ...}
"""
results = []
for i, item in enumerate(items):
t0 = time.time()
summary = self.summarize(item["text"])
elapsed = time.time() - t0
results.append({
"id": item.get("id", i),
"text": item["text"][:200],
"summary": summary,
"time_s": round(elapsed, 2),
})
if progress and (i + 1) % 10 == 0:
rate = (i + 1) / sum(r["time_s"] for r in results)
eta = (len(items) - i - 1) / rate
print(f" [{i+1}/{len(items)}] {rate:.1f}/s, ETA {eta:.0f}s",
file=sys.stderr, flush=True)
return results
def main():
parser = argparse.ArgumentParser(description="Generate one-sentence summaries")
parser.add_argument("text", nargs="?", help="Text to summarize")
parser.add_argument("--batch", help="JSON file with list of {id, text} objects")
parser.add_argument("--output", help="Output JSON file for batch mode")
parser.add_argument("--threads", type=int, default=8)
parser.add_argument("--model", default=MODEL_PATH)
args = parser.parse_args()
print("Loading summarizer...", file=sys.stderr, flush=True)
t0 = time.time()
s = Summarizer(model_path=args.model, n_threads=args.threads)
print(f"Ready ({time.time()-t0:.1f}s)", file=sys.stderr, flush=True)
if args.batch:
with open(args.batch) as f:
items = json.load(f)
print(f"Summarizing {len(items)} items...", file=sys.stderr, flush=True)
results = s.summarize_batch(items)
if args.output:
with open(args.output, "w") as f:
json.dump(results, f, indent=2)
print(f"Results saved to {args.output}", file=sys.stderr)
else:
for r in results:
print(json.dumps(r))
elif args.text:
print(s.summarize(args.text))
elif not sys.stdin.isatty():
text = sys.stdin.read().strip()
if text:
print(s.summarize(text))
else:
parser.print_help()
if __name__ == "__main__":
main()