Spaces:

askmanu
/

acorn

Running

File size: 6,121 Bytes

ad6d452

"""Example: Dependency Bloat & Overlap Scanner

Demonstrates the branching / map-reduce pattern in Acorn.

The DependencyScanner orchestrator fans out one PackageAnalyzerBranch per
dependency (map phase), collects the PackageProfile results, groups packages
by overlapping purpose, and returns a PruningPlan (reduce phase).

Supports both requirements.txt (pip) and package.json (npm) formats.
"""

import re

import requests
from pydantic import BaseModel, Field

from acorn import Module, tool


# ---------------------------------------------------------------------------
# Schemas
# ---------------------------------------------------------------------------


class PackageInput(BaseModel):

    package_name: str = Field(description="Package name as it appears in the dependency file")

    ecosystem: str = Field(description="'pip' for PyPI packages, 'npm' for Node packages")

class PackageProfile(BaseModel):
    name: str = Field(description="Package name")
    primary_purpose: str = Field(description="One-line description of what the package does")
    categories: list[str] = Field(description="1-3 functional tags, e.g. ['http', 'networking']")
    key_features: list[str] = Field(description="3-5 bullet points describing key features")


class RedundancyGroup(BaseModel):

    purpose: str = Field(description="Shared purpose, e.g. 'HTTP client'")

    packages: list[str] = Field(description="the packages in this group")

    keep: str = Field(description="Recommended package to keep")

    remove: list[str] = Field(description="Recommended packages to remove")

    reason: str = Field(description="Why the recommended package wins")


class DependencyFileInput(BaseModel):
    file_content: str = Field(
        description="Contents of a requirements.txt or package.json file"
    )

class PruningPlan(BaseModel):

    summary: str = Field(description="Narrative paragraph summarising findings and recommendations")

    redundancy_groups: list[RedundancyGroup] = Field(description="Groups of overlapping packages")

    packages_to_remove: list[str] = Field(description="Flat list of packages recommended for removal")




# ---------------------------------------------------------------------------
# Tool (module-level so PackageAnalyzerBranch can reference it)
# ---------------------------------------------------------------------------


@tool
def fetch_package_info(package_name: str, ecosystem: str) -> dict:
    """Fetch README/description for a package from PyPI or the npm registry.

    Args:
        package_name: Package name as it appears in the dependency file
        ecosystem: "pip" for PyPI packages, "npm" for Node packages

    Returns:
        Dict with 'name', 'description', 'readme' (trimmed to 3 000 chars)
    """
    # Strip version pins: requests==2.31.0 → requests, axios@1.6.0 → axios
    clean_name = re.split(r"[=<>!@~^]", package_name)[0].strip()

    if ecosystem == "pip":
        url = f"https://pypi.org/pypi/{clean_name}/json"
        try:
            resp = requests.get(url, timeout=10)
            resp.raise_for_status()
            data = resp.json()
            info = data.get("info", {})
            description = info.get("summary", "") or ""
            readme = info.get("description", "") or ""
        except requests.HTTPError:
            return {"name": clean_name, "description": "Not found on PyPI.", "readme": ""}
    else:  # npm
        url = f"https://registry.npmjs.org/{clean_name}"
        try:
            resp = requests.get(url, timeout=10)
            resp.raise_for_status()
            data = resp.json()
            description = data.get("description", "") or ""
            readme = data.get("readme", "") or ""
        except requests.HTTPError:
            return {"name": clean_name, "description": "Not found on npm.", "readme": ""}

    return {
        "name": clean_name,
        "description": description,
        "readme": readme[:3000],
    }


# ---------------------------------------------------------------------------
# Branch module — one instance per dependency
# ---------------------------------------------------------------------------


BRANCH_PROMPT = """Analyze a single package and determine its primary purpose.

1. Call fetch_package_info with the package_name and ecosystem.
2. From the description and README, identify: primary_purpose (one clear
   sentence), categories (1-3 functional tags), and key_features
   (3-5 bullet points).
3. Finish immediately after fetching — do not loop.
"""


class PackageAnalyzerBranch(Module):
    """Analyze a single package and extract its purpose and features."""

    system_prompt = BRANCH_PROMPT
    model = "anthropic/claude-sonnet-4-6"
    temperature = 0.2
    max_steps = 5

    initial_input = PackageInput
    final_output = PackageProfile

    tools = [fetch_package_info]


# ---------------------------------------------------------------------------
# Orchestrator module
# ---------------------------------------------------------------------------

SYSTEM_PROMPT = """You are a dependency analyst specializing in finding redundant libraries.

Workflow:
1. Parse the file_content to extract all top-level dependency names.
   Detect the ecosystem: "npm" if it parses as JSON (package.json),
   "pip" if it looks like requirements.txt.

2. For EACH dependency use the PackageAnalyzerBranch branch to analyze each. You can analyze multiple in parallel.

3. After all branches finish, review the collected PackageProfile results.
   Group packages by overlapping primary_purpose / categories.

4. Identify RedundancyGroups — packages that do the same job.
   Recommend keeping the most popular/maintained one and removing the rest.

5. Call __finish__ with the complete PruningPlan.
"""


class DependencyScanner(Module):
    """Scan a requirements.txt or package.json for redundant dependencies."""

    system_prompt = SYSTEM_PROMPT
    model = "anthropic/claude-haiku-4-5"
    temperature = 0.2
    max_steps = 60

    initial_input = DependencyFileInput
    final_output = PruningPlan

    branches = [PackageAnalyzerBranch]