Flamehaven's picture
feat: Implement core CRoM modules (packer, encoder, logger, server)
2844db6
from typing import List, Dict
import logging
def enhanced_greedy_pack(chunks: List[Dict], budget: int,
score_key: str = "score") -> tuple[List[Dict], Dict]:
"""
๊ธฐ์กด greedy_pack ํ•จ์ˆ˜๋ฅผ ํ™•์žฅํ•˜์—ฌ ์ƒ์„ธ ํ†ต๊ณ„ ๋ฐ˜ํ™˜
Returns:
tuple: (packed_chunks, stats_dict)
"""
if not chunks:
return [], {
"selected_count": 0,
"packed_count": 0,
"selected_tokens": 0,
"packed_tokens": 0,
"compression_ratio": 0.0,
"token_savings": 0,
"efficiency": 0.0
}
# ํ† ํฐ ์ˆ˜ ๋ฏธ๋ฆฌ ๊ณ„์‚ฐ
for chunk in chunks:
if "token_count" not in chunk:
chunk["token_count"] = max(1, len(chunk.get("text", "")) // 4)
# ํšจ์œจ์„ฑ ๊ธฐ์ค€ ์ •๋ ฌ (score/token ๋น„์œจ)
sorted_chunks = sorted(
chunks,
key=lambda x: x.get(score_key, 0) / x["token_count"],
reverse=True
)
# ๊ทธ๋ฆฌ๋”” ํŒจํ‚น
packed_chunks = []
used_tokens = 0
for chunk in sorted_chunks:
if used_tokens + chunk["token_count"] <= budget:
packed_chunks.append(chunk)
used_tokens += chunk["token_count"]
# ์ƒ์„ธ ํ†ต๊ณ„ ๊ณ„์‚ฐ
total_selected_tokens = sum(chunk["token_count"] for chunk in chunks)
stats = {
"selected_count": len(chunks),
"packed_count": len(packed_chunks),
"selected_tokens": total_selected_tokens,
"packed_tokens": used_tokens,
"compression_ratio": len(packed_chunks) / len(chunks) if chunks else 0.0,
"token_savings": total_selected_tokens - used_tokens,
"efficiency": used_tokens / budget if budget > 0 else 0.0
}
# ๐Ÿ“Š ๋กœ๊น… ์ถ”๊ฐ€ (๊ธฐ์กด ์ฝ”๋“œ์— ์—†๋˜ ํ†ต๊ณ„ ๊ฐ€์‹œ์„ฑ)
logging.info(f"Packing completed: {stats['packed_count']}/{stats['selected_count']} chunks, "
f"tokens: {stats['packed_tokens']}/{stats['selected_tokens']} "
f"(efficiency: {stats['efficiency']:.1%})")
return packed_chunks, stats