| from pathlib import Path | |
| import json | |
| def get_md_contents(src_dir, target_dir="/home/weifengsun/tangou1/step2/step22/dataset"): | |
| src_path = Path(src_dir) | |
| target_path = Path(target_dir) | |
| for subdir in src_path.iterdir(): | |
| if subdir.is_dir(): | |
| readme_file = subdir / "README_SUMMARY.md" | |
| if readme_file.exists(): | |
| content = readme_file.read_text(encoding='utf-8') | |
| dest_dir = target_path / subdir.name | |
| dest_dir.mkdir(parents=True, exist_ok=True) | |
| with open(dest_dir / "readme_summary.json", "w", encoding='utf-8') as f: | |
| json.dump({"readme_summary": content}, f, ensure_ascii=False, indent=4) | |
| if __name__ == "__main__": | |
| # /home/weifengsun/tangou1/step2/step22/dataset | |
| get_md_contents("/home/weifengsun/tangou1/domain_code/src/workdir/repos_filtered") |