Spaces:

build-small-hackathon
/

tiny-press

Running on Zero

App Files Files Community

tiny-press / core /compressor.py

sriharsha-cr

NoGPU 0.2

c4cbe0b about 23 hours ago

raw

history blame contribute delete

1.83 kB

	import torch
	import config
	from core.tokenizer_utils import count_tokens
	from models.model_loader import get_llm

	try:
	import spaces
	_gpu = spaces.GPU
	except ImportError:
	_gpu = lambda fn: fn # no-op when running locally without the spaces package


	_PROMPT_TEMPLATE = """You are a lossless compression assistant. Compress the following text to at most {target} tokens.
	Preserve all key facts, decisions, and intent. Do not add commentary. Output only the compressed text.

	TEXT:
	{text}

	COMPRESSED:"""


	@_gpu
	def _generate(prompt: str) -> str:
	model, tokenizer = get_llm()
	device = "cuda" if torch.cuda.is_available() else "cpu"
	model.to(device)
	inputs = tokenizer(prompt, return_tensors="pt").to(device)
	with torch.no_grad():
	output_ids = model.generate(
	**inputs,
	max_new_tokens=config.MAX_NEW_TOKENS,
	do_sample=False,
	pad_token_id=tokenizer.eos_token_id,
	)
	new_tokens = output_ids[0][inputs["input_ids"].shape[1]:]
	return tokenizer.decode(new_tokens, skip_special_tokens=True).strip()


	def compress(text: str, target_tokens: int) -> tuple[str, int, int]:
	"""Returns (compressed_text, input_token_count, output_token_count)."""
	input_tokens = count_tokens(text)

	if input_tokens <= target_tokens:
	return text, input_tokens, input_tokens

	prompt = _PROMPT_TEMPLATE.format(target=target_tokens, text=text)
	compressed = _generate(prompt)

	# Trim to hard token limit if model overshoots
	_, tokenizer = get_llm()
	ids = tokenizer.encode(compressed, add_special_tokens=False)
	if len(ids) > target_tokens:
	compressed = tokenizer.decode(ids[:target_tokens], skip_special_tokens=True)

	output_tokens = count_tokens(compressed)
	return compressed, input_tokens, output_tokens