| import os |
| import shutil |
| import subprocess |
| import json |
| from src.devcore.syntax_ingester import SyntaxIngester |
|
|
| class DeveloperDataPipeline: |
| def __init__(self, target_dir="src/devcore/training_data"): |
| self.target_dir = target_dir |
| self.ingester = SyntaxIngester() |
| os.makedirs(self.target_dir, exist_ok=True) |
|
|
| def fetch_and_ingest(self, repo_url, project_name): |
| """ |
| Surgically clones a repository, extracts the abstract syntax trees, |
| and purges the raw files to conserve disk space. |
| """ |
| print(f"\n[=================================================]") |
| print(f"[+] Initializing target acquisition: {project_name}") |
| print(f"[=================================================]") |
| |
| clone_path = os.path.join(self.target_dir, project_name) |
| |
| |
| print(f"[*] Cloning source data from {repo_url}...") |
| try: |
| subprocess.run( |
| ["git", "clone", "--depth", "1", repo_url, clone_path], |
| check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL |
| ) |
| except subprocess.CalledProcessError: |
| print("[-] Target acquisition failed. Check network or URL.") |
| return |
|
|
| |
| print("[*] Engaging Syntax Ingester to extract logic structures...") |
| structural_profiles = self.ingester.process_directory(clone_path) |
| |
| |
| output_file = os.path.join(self.target_dir, f"{project_name}_ast_profile.json") |
| with open(output_file, 'w', encoding='utf-8') as f: |
| json.dump(structural_profiles, f, indent=2) |
| |
| print(f"[+] Structural logic mapped successfully. Profile contains {len(structural_profiles)} modules.") |
| print(f"[+] Hebbian training payload saved to: {output_file}") |
|
|
| |
| print("[*] Purging raw repository files to conserve system partition space...") |
| shutil.rmtree(clone_path) |
| print("[+] Operation complete. System storage secured.") |
|
|
| if __name__ == "__main__": |
| pipeline = DeveloperDataPipeline() |
| |
| |
| target_repo = "https://github.com/pallets/flask.git" |
| pipeline.fetch_and_ingest(target_repo, "flask_core_logic") |
|
|