Spaces:

Pekku
/

Chart

Sleeping

App Files Files Community

Chart / dataset /label /Vchart /box /apply_source_from_webtxt.py

adddrett

clean init

9fce90e about 2 months ago

raw

history blame contribute delete

4.87 kB

	#!/usr/bin/env python3
	# -- coding: utf-8 --

	import argparse
	import json
	import re
	import sys
	from pathlib import Path


	FILENAME_RE = re.compile(r"^chart_(\d+)_bar\.json$", re.IGNORECASE)


	def extract_num(p: Path) -> int:
	m = FILENAME_RE.match(p.name)
	if not m:
	raise ValueError(f"文件名不符合规则: {p.name}")
	return int(m.group(1)) # 按数值排序


	def read_lines_no_empty(txt_path: Path) -> list[str]:
	try:
	raw = txt_path.read_text(encoding="utf-8")
	except UnicodeDecodeError:
	raw = txt_path.read_text(encoding="utf-8-sig", errors="replace")

	# splitlines() 不包含末尾换行符；并过滤空行（即使你说没有空行，也做防御）
	lines = [line for line in raw.splitlines() if line.strip() != ""]
	return lines


	def main():
	ap = argparse.ArgumentParser(
	description="按文件名中间数字正序，将 web.txt 每行写入对应 JSON 的 Weblink 字段"
	)
	ap.add_argument("dir", help="包含 json 与 web.txt 的文件夹路径")
	ap.add_argument("--txt", default="web.txt", help="文本文件名/路径（默认 web.txt）")
	ap.add_argument("--dry-run", action="store_true", help="只预览不写入")
	ap.add_argument("--backup", action="store_true", help="写入前备份为 .bak（仅第一次）")
	args = ap.parse_args()

	folder = Path(args.dir)
	if not folder.exists() or not folder.is_dir():
	print(f"目录不存在或不是文件夹：{folder}", file=sys.stderr)
	return 2

	txt_path = Path(args.txt)
	if not txt_path.is_absolute():
	txt_path = folder / txt_path
	if not txt_path.exists():
	print(f"找不到 web.txt：{txt_path}", file=sys.stderr)
	return 2

	# 找 json（严格匹配 chart_0001_line.json 这种）
	candidates = list(folder.glob("chart_*_bar.json"))
	json_files = []
	bad_names = []
	for p in candidates:
	if FILENAME_RE.match(p.name):
	json_files.append(p)
	else:
	bad_names.append(p.name)

	if not json_files:
	print("未找到符合 chart_0001_bar.json 规则的文件。", file=sys.stderr)
	if candidates:
	print("但找到一些类似文件：", file=sys.stderr)
	for n in sorted([c.name for c in candidates])[:20]:
	print(f" - {n}", file=sys.stderr)
	return 1

	# 按“中间数字”数值正序排序
	json_files.sort(key=extract_num)

	# 读 web.txt（过滤空行；不含换行符）
	lines = read_lines_no_empty(txt_path)

	if len(lines) != len(json_files):
	print("行数与 JSON 数量不一致，停止执行：", file=sys.stderr)
	print(f" web.txt 有效行数(已过滤空行): {len(lines)}", file=sys.stderr)
	print(f" json 文件数: {len(json_files)}", file=sys.stderr)
	print("\n前几个 json 文件（排序后）：", file=sys.stderr)
	for p in json_files[:10]:
	print(f" - {p.name}", file=sys.stderr)
	return 1

	changed = 0

	for i, (p, src_value) in enumerate(zip(json_files, lines), start=1):
	try:
	original_text = p.read_text(encoding="utf-8")
	except UnicodeDecodeError:
	original_text = p.read_text(encoding="utf-8-sig", errors="replace")

	try:
	data = json.loads(original_text)
	except Exception as e:
	print(f"错误（JSON 解析失败）：{p.name} -> {e}")
	continue

	if not isinstance(data, dict):
	print(f"跳过（JSON 顶层不是对象）：{p.name}")
	continue

	old = data.get("Weblink", None)
	data["Weblink"] = src_value
	will_change = (old != src_value)

	if args.dry_run:
	print(f"[DRY {i:04d}] {p.name}: Weblink {old!r} -> {src_value!r}")
	continue

	if will_change and args.backup:
	bak = p.with_suffix(p.suffix + ".bak")
	if not bak.exists():
	bak.write_text(original_text, encoding="utf-8")

	if will_change:
	p.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
	changed += 1
	print(f"[OK {i:04d}] {p.name}: Weblink 写入成功")
	else:
	print(f"[NO {i:04d}] {p.name}: Weblink 无变化（已相同）")

	print("\n==== 完成 ====")
	print(f"总文件数: {len(json_files)}")
	print(f"发生写入: {changed}")

	if bad_names:
	print("\n提示：同目录下还有这些文件名不符合严格规则（未处理）：")
	for n in sorted(bad_names)[:50]:
	print(f" - {n}")

	return 0


	if __name__ == "__main__":
	raise SystemExit(main())