Upload training/summarize.py with huggingface_hub

4915429 verified 14 days ago

4.31 kB

	#!/usr/bin/env python3
	"""
	Generate one-sentence summaries using the distilled Qwen3-0.6B model.

	Uses llama-cpp-python for CPU inference. ~3-5s per summary.

	Usage:
	# Single text
	python3 summarize.py "Eric wants to add dark mode to the webui-next app."

	# From stdin
	echo "Some text to summarize" \| python3 summarize.py

	# Batch from JSON (list of {"id": ..., "text": ...})
	python3 summarize.py --batch input.json --output results.json

	# As a library
	from summarize import Summarizer
	s = Summarizer()
	print(s.summarize("Some text here"))
	"""
	import argparse
	import json
	import os
	import sys
	import time
	from pathlib import Path

	PROJECT_DIR = Path(__file__).parent
	MODEL_PATH = str(PROJECT_DIR / "models" / "qwen3-0.6b-summarizer-q8_0.gguf")
	PROMPT_TEMPLATE = "Summarize in one sentence:\n{text}\n\nSummary:"


	class Summarizer:
	"""CPU-based one-sentence summarizer using distilled Qwen3-0.6B."""

	def __init__(self, model_path=MODEL_PATH, n_threads=8, n_ctx=512):
	from llama_cpp import Llama
	self.llm = Llama(model_path, n_ctx=n_ctx, n_threads=n_threads, verbose=False)

	def summarize(self, text, max_tokens=80, temperature=0.3, max_input_chars=2000):
	"""Generate a one-sentence summary of the input text.

	Args:
	text: Raw text to summarize (markdown, chat messages, task descriptions, etc.)
	max_tokens: Maximum output tokens (summaries are typically 20-40 tokens)
	temperature: Sampling temperature (0.3 = mostly deterministic)
	max_input_chars: Truncate input beyond this length

	Returns:
	One-sentence summary string
	"""
	prompt = PROMPT_TEMPLATE.format(text=text[:max_input_chars])
	out = self.llm(prompt, max_tokens=max_tokens, temperature=temperature,
	stop=["\n", "<\|im_end\|>", "<\|endoftext\|>"])
	return out["choices"][0]["text"].strip()

	def summarize_batch(self, items, progress=True):
	"""Summarize a list of {"id": ..., "text": ...} dicts.

	Returns list of {"id": ..., "text": ..., "summary": ..., "time_s": ...}
	"""
	results = []
	for i, item in enumerate(items):
	t0 = time.time()
	summary = self.summarize(item["text"])
	elapsed = time.time() - t0
	results.append({
	"id": item.get("id", i),
	"text": item["text"][:200],
	"summary": summary,
	"time_s": round(elapsed, 2),
	})
	if progress and (i + 1) % 10 == 0:
	rate = (i + 1) / sum(r["time_s"] for r in results)
	eta = (len(items) - i - 1) / rate
	print(f" [{i+1}/{len(items)}] {rate:.1f}/s, ETA {eta:.0f}s",
	file=sys.stderr, flush=True)
	return results


	def main():
	parser = argparse.ArgumentParser(description="Generate one-sentence summaries")
	parser.add_argument("text", nargs="?", help="Text to summarize")
	parser.add_argument("--batch", help="JSON file with list of {id, text} objects")
	parser.add_argument("--output", help="Output JSON file for batch mode")
	parser.add_argument("--threads", type=int, default=8)
	parser.add_argument("--model", default=MODEL_PATH)
	args = parser.parse_args()

	print("Loading summarizer...", file=sys.stderr, flush=True)
	t0 = time.time()
	s = Summarizer(model_path=args.model, n_threads=args.threads)
	print(f"Ready ({time.time()-t0:.1f}s)", file=sys.stderr, flush=True)

	if args.batch:
	with open(args.batch) as f:
	items = json.load(f)
	print(f"Summarizing {len(items)} items...", file=sys.stderr, flush=True)
	results = s.summarize_batch(items)
	if args.output:
	with open(args.output, "w") as f:
	json.dump(results, f, indent=2)
	print(f"Results saved to {args.output}", file=sys.stderr)
	else:
	for r in results:
	print(json.dumps(r))
	elif args.text:
	print(s.summarize(args.text))
	elif not sys.stdin.isatty():
	text = sys.stdin.read().strip()
	if text:
	print(s.summarize(text))
	else:
	parser.print_help()


	if __name__ == "__main__":
	main()