agent-forge-bak / text_summarizer.json
Chris4K's picture
Upload 14 files
988c7cc verified
{
"id": "text_summarizer",
"name": "Text Summarizer",
"version": "1.0.0",
"description": "Summarize long text using HuggingFace Inference API (facebook/bart-large-cnn). Falls back to extractive summarization if the API is unavailable. No API key required for basic use.",
"author": "Chris4K",
"tags": ["nlp", "summarization", "text", "huggingface"],
"dependencies": ["requests"],
"schema": {
"input": {
"text": "str — text to summarize",
"max_length": "int — max tokens in summary (default: 130)",
"min_length": "int — min tokens in summary (default: 30)",
"hf_token": "str — optional HuggingFace API token for higher rate limits"
},
"output": {
"summary": "str",
"method": "str — 'hf_api' or 'extractive'",
"original_length": "int",
"summary_length": "int"
}
},
"code": "import re\nimport requests\nfrom typing import Optional\n\nHF_API_URL = \"https://api-inference.huggingface.co/models/facebook/bart-large-cnn\"\n\n\ndef _extractive_summary(text: str, sentences: int = 3) -> str:\n \"\"\"Simple extractive fallback: pick first N non-trivial sentences.\"\"\"\n raw = re.split(r\"(?<=[.!?])\\s+\", text.strip())\n clean = [s.strip() for s in raw if len(s.split()) > 8]\n return \" \".join(clean[:sentences])\n\n\ndef execute(\n text: str,\n max_length: int = 130,\n min_length: int = 30,\n hf_token: Optional[str] = None,\n) -> dict:\n \"\"\"Summarize text using HuggingFace BART or extractive fallback.\"\"\"\n if not text or not text.strip():\n return {\"error\": \"text cannot be empty\"}\n\n original_length = len(text.split())\n\n # Truncate very long texts for the API\n truncated_text = \" \".join(text.split()[:1024])\n\n headers = {\"Content-Type\": \"application/json\"}\n if hf_token:\n headers[\"Authorization\"] = f\"Bearer {hf_token}\"\n\n payload = {\n \"inputs\": truncated_text,\n \"parameters\": {\n \"max_length\": max_length,\n \"min_length\": min_length,\n \"do_sample\": False,\n },\n }\n\n try:\n r = requests.post(HF_API_URL, headers=headers, json=payload, timeout=30)\n if r.status_code == 200:\n result = r.json()\n if isinstance(result, list) and result:\n summary = result[0].get(\"summary_text\", \"\")\n if summary:\n return {\n \"summary\": summary,\n \"method\": \"hf_api\",\n \"original_length\": original_length,\n \"summary_length\": len(summary.split()),\n \"model\": \"facebook/bart-large-cnn\",\n }\n except Exception:\n pass\n\n # Fallback: extractive\n summary = _extractive_summary(text)\n return {\n \"summary\": summary,\n \"method\": \"extractive_fallback\",\n \"original_length\": original_length,\n \"summary_length\": len(summary.split()),\n \"note\": \"HF API unavailable; used extractive summarization\",\n }\n",
"downloads": 0,
"created_at": 1710000005
}