| from typing import List, Dict |
| import logging |
|
|
| def enhanced_greedy_pack(chunks: List[Dict], budget: int, |
| score_key: str = "score") -> tuple[List[Dict], Dict]: |
| """ |
| ๊ธฐ์กด greedy_pack ํจ์๋ฅผ ํ์ฅํ์ฌ ์์ธ ํต๊ณ ๋ฐํ |
| |
| Returns: |
| tuple: (packed_chunks, stats_dict) |
| """ |
| if not chunks: |
| return [], { |
| "selected_count": 0, |
| "packed_count": 0, |
| "selected_tokens": 0, |
| "packed_tokens": 0, |
| "compression_ratio": 0.0, |
| "token_savings": 0, |
| "efficiency": 0.0 |
| } |
| |
| |
| for chunk in chunks: |
| if "token_count" not in chunk: |
| chunk["token_count"] = max(1, len(chunk.get("text", "")) // 4) |
| |
| |
| sorted_chunks = sorted( |
| chunks, |
| key=lambda x: x.get(score_key, 0) / x["token_count"], |
| reverse=True |
| ) |
| |
| |
| packed_chunks = [] |
| used_tokens = 0 |
| |
| for chunk in sorted_chunks: |
| if used_tokens + chunk["token_count"] <= budget: |
| packed_chunks.append(chunk) |
| used_tokens += chunk["token_count"] |
| |
| |
| total_selected_tokens = sum(chunk["token_count"] for chunk in chunks) |
| |
| stats = { |
| "selected_count": len(chunks), |
| "packed_count": len(packed_chunks), |
| "selected_tokens": total_selected_tokens, |
| "packed_tokens": used_tokens, |
| "compression_ratio": len(packed_chunks) / len(chunks) if chunks else 0.0, |
| "token_savings": total_selected_tokens - used_tokens, |
| "efficiency": used_tokens / budget if budget > 0 else 0.0 |
| } |
| |
| |
| logging.info(f"Packing completed: {stats['packed_count']}/{stats['selected_count']} chunks, " |
| f"tokens: {stats['packed_tokens']}/{stats['selected_tokens']} " |
| f"(efficiency: {stats['efficiency']:.1%})") |
| |
| return packed_chunks, stats |
|
|