garbage-classifier / knowledge_base.py
zhangruicong-ai
@
9361732
Raw
History Blame Contribute Delete
6.27 kB
"""
垃圾 DNA 知识库 - 物品生命周期溯源系统
Garbage DNA Knowledge Base - Item Lifecycle Traceability
提供每件垃圾的:材质构成、降解时间、回收去向、趣味知识和处理建议
"""
import json
import os
import logging
from typing import Optional, Dict, List
logger = logging.getLogger(__name__)
_DATA_FILE = os.path.join(os.path.dirname(__file__), "data", "item_dna.json")
# 中文别名 -> 英文 key 映射
_ITEM_ALIASES = {
"塑料瓶": "plastic bottle for drinking",
"玻璃瓶": "glass bottle for drinking",
"易拉罐": "aluminum can for soda or beer",
"纸张纸板": "paper and cardboard for recycling",
"纸箱": "cardboard box for packaging",
"报纸杂志": "newspaper and magazines",
"衣物织物": "clothing and textiles",
"塑料袋": "plastic bag for shopping",
"金属制品": "metal tools and hardware",
"电子产品": "electronics and wires with circuit",
"书本": "books and notebooks for school",
"玻璃罐": "glass jar with lid",
"牛奶盒": "milk carton or tetra pak drink box",
"干净纸杯": "clean paper coffee cup",
"铁罐": "iron and steel food cans",
"塑料容器": "plastic container for food storage",
"鞋包": "shoes and bags made of fabric",
"瓶盖": "plastic bottle cap",
"电池": "battery cell for electronics",
"灯泡": "light bulb glass transparent",
"药品": "medicine pills and tablets",
"油漆桶": "paint can with metal handle",
"化妆品": "nail polish bottle small",
"食物残渣": "food waste and leftovers on plate",
"水果果皮": "fruit and fruit peels organic waste",
"茶叶咖啡渣": "tea leaves and coffee grounds wet",
"蛋壳": "egg shell broken pieces",
"鱼骨鸡骨": "fish bones and chicken bones",
"花草植物": "flowers and plants wilted",
"泡沫塑料": "styrofoam and foam packaging white",
"口罩": "disposable face mask blue",
"陶瓷": "ceramics and pottery plate or bowl",
"纸巾": "tissue paper and napkins used",
"烟蒂": "cigarette butt with filter",
"尿不湿/卫生巾": "diaper and sanitary pads",
"泡面桶": "instant noodle cup styrofoam cup with lid",
"外卖盒": "takeout food container plastic box",
"奶茶杯": "bubble tea cup with plastic lid and straw",
"一次性筷子": "disposable wooden chopsticks pair",
"吸管": "plastic drinking straw",
"碎玻璃": "broken glass not recyclable shards",
"泡沫饭盒": "disposable foam lunch box",
"湿巾": "wet wipes in package",
"保鲜膜": "plastic wrap and cling film",
"碎瓷器": "broken porcelain ceramic shards",
"木筷": "wooden chopsticks reusable",
"牙签": "wooden toothpick small",
"棉签": "cotton swab for ears",
"创可贴": "bandage and adhesive tape medical",
"纸碗": "paper bowl disposable",
"木材家具": "wooden furniture or wood pieces",
"杀虫剂瓶": "pesticide spray bottle",
"温度计": "mercury thermometer glass",
"荧光灯管": "fluorescent tube light",
"尘土": "dust and dirt pile",
}
class KnowledgeBase:
"""垃圾 DNA 知识库 - 从 JSON 文件加载"""
def __init__(self):
self._dna: Dict[str, dict] = {}
def load(self):
"""从 JSON 文件加载 DNA 数据"""
if not os.path.exists(_DATA_FILE):
logger.warning(f"[DNA] 知识库文件不存在: {_DATA_FILE}")
return
try:
with open(_DATA_FILE, "r", encoding="utf-8") as f:
self._dna = json.load(f)
logger.info(f"[DNA] 已加载 {len(self._dna)} 条知识记录")
except Exception as e:
logger.error(f"[DNA] 加载知识库失败: {e}")
def query(self, item_name: str = None, item_name_zh: str = None) -> Optional[Dict]:
"""根据物品名称查询 DNA 信息
匹配策略:
1. 精确匹配英文 key
2. 精确匹配中文别名
3. 模糊匹配: 中文名包含关系
4. 模糊匹配: 英文 key 包含关系
"""
if not item_name and not item_name_zh:
return None
# 1. 精确匹配英文 key
if item_name and item_name in self._dna:
return self._dna[item_name]
# 2. 精确匹配中文别名
if item_name_zh and item_name_zh in _ITEM_ALIASES:
key = _ITEM_ALIASES[item_name_zh]
if key in self._dna:
return self._dna[key]
# 3. 模糊匹配中文
if item_name_zh:
for alias, key in _ITEM_ALIASES.items():
if alias in item_name_zh or item_name_zh in alias:
if key in self._dna:
return self._dna[key]
# 4. 模糊匹配英文
if item_name:
for key in self._dna:
if item_name in key or key in item_name:
return self._dna[key]
return None
def all_items(self) -> List[str]:
return list(self._dna.keys())
@property
def count(self) -> int:
return len(self._dna)
# 全局单例
_kb: Optional[KnowledgeBase] = None
def get_knowledge_base() -> KnowledgeBase:
"""获取知识库单例"""
global _kb
if _kb is None:
_kb = KnowledgeBase()
_kb.load()
return _kb
def query_dna(item_name: str = None, item_name_zh: str = None) -> Optional[Dict]:
"""便捷查询接口"""
return get_knowledge_base().query(item_name, item_name_zh)
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
kb = get_knowledge_base()
print(f"知识库已加载 {kb.count} 条记录")
tests = [
("plastic bottle for drinking", "塑料瓶"),
("aluminum can for soda or beer", "易拉罐"),
("battery cell for electronics", "电池"),
(None, "未知物品"),
]
for en, zh in tests:
result = kb.query(en, zh)
if result:
print(f" [{zh}] 材质: {result['material'][:30]}...")
print(f" 降解: {result['decomposition_desc'][:40]}...")
print(f" 趣味: {result['fun_facts'][0]}")
else:
print(f" [{zh}] 未找到 DNA 信息")