| | """Example: Dependency Bloat & Overlap Scanner |
| | |
| | Demonstrates the branching / map-reduce pattern in Acorn. |
| | |
| | The DependencyScanner orchestrator fans out one PackageAnalyzerBranch per |
| | dependency (map phase), collects the PackageProfile results, groups packages |
| | by overlapping purpose, and returns a PruningPlan (reduce phase). |
| | |
| | Supports both requirements.txt (pip) and package.json (npm) formats. |
| | """ |
| |
|
| | import re |
| |
|
| | import requests |
| | from pydantic import BaseModel, Field |
| |
|
| | from acorn import Module, tool |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| |
|
| | class PackageInput(BaseModel): |
| |
|
| | package_name: str = Field(description="Package name as it appears in the dependency file") |
| |
|
| | ecosystem: str = Field(description="'pip' for PyPI packages, 'npm' for Node packages") |
| |
|
| | class PackageProfile(BaseModel): |
| | name: str = Field(description="Package name") |
| | primary_purpose: str = Field(description="One-line description of what the package does") |
| | categories: list[str] = Field(description="1-3 functional tags, e.g. ['http', 'networking']") |
| | key_features: list[str] = Field(description="3-5 bullet points describing key features") |
| |
|
| |
|
| | class RedundancyGroup(BaseModel): |
| |
|
| | purpose: str = Field(description="Shared purpose, e.g. 'HTTP client'") |
| |
|
| | packages: list[str] = Field(description="the packages in this group") |
| |
|
| | keep: str = Field(description="Recommended package to keep") |
| |
|
| | remove: list[str] = Field(description="Recommended packages to remove") |
| |
|
| | reason: str = Field(description="Why the recommended package wins") |
| |
|
| |
|
| | class DependencyFileInput(BaseModel): |
| | file_content: str = Field( |
| | description="Contents of a requirements.txt or package.json file" |
| | ) |
| |
|
| | class PruningPlan(BaseModel): |
| |
|
| | summary: str = Field(description="Narrative paragraph summarising findings and recommendations") |
| |
|
| | redundancy_groups: list[RedundancyGroup] = Field(description="Groups of overlapping packages") |
| |
|
| | packages_to_remove: list[str] = Field(description="Flat list of packages recommended for removal") |
| |
|
| |
|
| |
|
| |
|
| | |
| | |
| | |
| |
|
| |
|
| | @tool |
| | def fetch_package_info(package_name: str, ecosystem: str) -> dict: |
| | """Fetch README/description for a package from PyPI or the npm registry. |
| | |
| | Args: |
| | package_name: Package name as it appears in the dependency file |
| | ecosystem: "pip" for PyPI packages, "npm" for Node packages |
| | |
| | Returns: |
| | Dict with 'name', 'description', 'readme' (trimmed to 3 000 chars) |
| | """ |
| | |
| | clean_name = re.split(r"[=<>!@~^]", package_name)[0].strip() |
| |
|
| | if ecosystem == "pip": |
| | url = f"https://pypi.org/pypi/{clean_name}/json" |
| | try: |
| | resp = requests.get(url, timeout=10) |
| | resp.raise_for_status() |
| | data = resp.json() |
| | info = data.get("info", {}) |
| | description = info.get("summary", "") or "" |
| | readme = info.get("description", "") or "" |
| | except requests.HTTPError: |
| | return {"name": clean_name, "description": "Not found on PyPI.", "readme": ""} |
| | else: |
| | url = f"https://registry.npmjs.org/{clean_name}" |
| | try: |
| | resp = requests.get(url, timeout=10) |
| | resp.raise_for_status() |
| | data = resp.json() |
| | description = data.get("description", "") or "" |
| | readme = data.get("readme", "") or "" |
| | except requests.HTTPError: |
| | return {"name": clean_name, "description": "Not found on npm.", "readme": ""} |
| |
|
| | return { |
| | "name": clean_name, |
| | "description": description, |
| | "readme": readme[:3000], |
| | } |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| |
|
| | BRANCH_PROMPT = """Analyze a single package and determine its primary purpose. |
| | |
| | 1. Call fetch_package_info with the package_name and ecosystem. |
| | 2. From the description and README, identify: primary_purpose (one clear |
| | sentence), categories (1-3 functional tags), and key_features |
| | (3-5 bullet points). |
| | 3. Finish immediately after fetching β do not loop. |
| | """ |
| |
|
| |
|
| | class PackageAnalyzerBranch(Module): |
| | """Analyze a single package and extract its purpose and features.""" |
| |
|
| | system_prompt = BRANCH_PROMPT |
| | model = "anthropic/claude-sonnet-4-6" |
| | temperature = 0.2 |
| | max_steps = 5 |
| |
|
| | initial_input = PackageInput |
| | final_output = PackageProfile |
| |
|
| | tools = [fetch_package_info] |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | SYSTEM_PROMPT = """You are a dependency analyst specializing in finding redundant libraries. |
| | |
| | Workflow: |
| | 1. Parse the file_content to extract all top-level dependency names. |
| | Detect the ecosystem: "npm" if it parses as JSON (package.json), |
| | "pip" if it looks like requirements.txt. |
| | |
| | 2. For EACH dependency use the PackageAnalyzerBranch branch to analyze each. You can analyze multiple in parallel. |
| | |
| | 3. After all branches finish, review the collected PackageProfile results. |
| | Group packages by overlapping primary_purpose / categories. |
| | |
| | 4. Identify RedundancyGroups β packages that do the same job. |
| | Recommend keeping the most popular/maintained one and removing the rest. |
| | |
| | 5. Call __finish__ with the complete PruningPlan. |
| | """ |
| |
|
| |
|
| | class DependencyScanner(Module): |
| | """Scan a requirements.txt or package.json for redundant dependencies.""" |
| |
|
| | system_prompt = SYSTEM_PROMPT |
| | model = "anthropic/claude-haiku-4-5" |
| | temperature = 0.2 |
| | max_steps = 60 |
| |
|
| | initial_input = DependencyFileInput |
| | final_output = PruningPlan |
| |
|
| | branches = [PackageAnalyzerBranch] |
| |
|