File size: 3,203 Bytes
988c7cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
{
  "id": "text_summarizer",
  "name": "Text Summarizer",
  "version": "1.0.0",
  "description": "Summarize long text using HuggingFace Inference API (facebook/bart-large-cnn). Falls back to extractive summarization if the API is unavailable. No API key required for basic use.",
  "author": "Chris4K",
  "tags": ["nlp", "summarization", "text", "huggingface"],
  "dependencies": ["requests"],
  "schema": {
    "input": {
      "text": "str — text to summarize",
      "max_length": "int — max tokens in summary (default: 130)",
      "min_length": "int — min tokens in summary (default: 30)",
      "hf_token": "str — optional HuggingFace API token for higher rate limits"
    },
    "output": {
      "summary": "str",
      "method": "str — 'hf_api' or 'extractive'",
      "original_length": "int",
      "summary_length": "int"
    }
  },
  "code": "import re\nimport requests\nfrom typing import Optional\n\nHF_API_URL = \"https://api-inference.huggingface.co/models/facebook/bart-large-cnn\"\n\n\ndef _extractive_summary(text: str, sentences: int = 3) -> str:\n    \"\"\"Simple extractive fallback: pick first N non-trivial sentences.\"\"\"\n    raw = re.split(r\"(?<=[.!?])\\s+\", text.strip())\n    clean = [s.strip() for s in raw if len(s.split()) > 8]\n    return \" \".join(clean[:sentences])\n\n\ndef execute(\n    text: str,\n    max_length: int = 130,\n    min_length: int = 30,\n    hf_token: Optional[str] = None,\n) -> dict:\n    \"\"\"Summarize text using HuggingFace BART or extractive fallback.\"\"\"\n    if not text or not text.strip():\n        return {\"error\": \"text cannot be empty\"}\n\n    original_length = len(text.split())\n\n    # Truncate very long texts for the API\n    truncated_text = \" \".join(text.split()[:1024])\n\n    headers = {\"Content-Type\": \"application/json\"}\n    if hf_token:\n        headers[\"Authorization\"] = f\"Bearer {hf_token}\"\n\n    payload = {\n        \"inputs\": truncated_text,\n        \"parameters\": {\n            \"max_length\": max_length,\n            \"min_length\": min_length,\n            \"do_sample\": False,\n        },\n    }\n\n    try:\n        r = requests.post(HF_API_URL, headers=headers, json=payload, timeout=30)\n        if r.status_code == 200:\n            result = r.json()\n            if isinstance(result, list) and result:\n                summary = result[0].get(\"summary_text\", \"\")\n                if summary:\n                    return {\n                        \"summary\": summary,\n                        \"method\": \"hf_api\",\n                        \"original_length\": original_length,\n                        \"summary_length\": len(summary.split()),\n                        \"model\": \"facebook/bart-large-cnn\",\n                    }\n    except Exception:\n        pass\n\n    # Fallback: extractive\n    summary = _extractive_summary(text)\n    return {\n        \"summary\": summary,\n        \"method\": \"extractive_fallback\",\n        \"original_length\": original_length,\n        \"summary_length\": len(summary.split()),\n        \"note\": \"HF API unavailable; used extractive summarization\",\n    }\n",
  "downloads": 0,
  "created_at": 1710000005
}