Spaces:
Running
Running
Update format_transplant.py
Browse files- format_transplant.py +126 -38
format_transplant.py
CHANGED
|
@@ -22,6 +22,7 @@ import argparse
|
|
| 22 |
import asyncio
|
| 23 |
import copy
|
| 24 |
import logging
|
|
|
|
| 25 |
import re
|
| 26 |
import shutil
|
| 27 |
import sys
|
|
@@ -93,6 +94,28 @@ logging.basicConfig(
|
|
| 93 |
)
|
| 94 |
logger = logging.getLogger("FormatTransplant")
|
| 95 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
# ============================================================================
|
| 97 |
# SEMANTIC CLASSIFICATION CONSTANTS
|
| 98 |
# ============================================================================
|
|
@@ -291,15 +314,15 @@ class LLMProvider(Enum):
|
|
| 291 |
|
| 292 |
# Per-provider defaults β base_url=None means the provider uses its own SDK
|
| 293 |
PROVIDER_DEFAULTS: Dict[str, Dict[str, Any]] = {
|
| 294 |
-
"openai": {"base_url": "https://api.openai.com/v1", "env": "OPENAI_API_KEY", "model": "gpt-4o"},
|
| 295 |
-
"anthropic": {"base_url": None, "env": "ANTHROPIC_API_KEY", "model": "claude-3-5-sonnet-20241022"},
|
| 296 |
-
"groq": {"base_url": "https://api.groq.com/openai/v1", "env": "GROQ_API_KEY", "model": "llama-3.3-70b-versatile"},
|
| 297 |
-
"nebius": {"base_url": "https://api.studio.nebius.com/v1", "env": "NEBIUS_API_KEY", "model": "meta-llama/Meta-Llama-3.1-70B-Instruct"},
|
| 298 |
-
"scaleway": {"base_url": "https://api.scaleway.ai/v1", "env": "SCW_SECRET_KEY", "model": "llama-3.3-70b-instruct"},
|
| 299 |
-
"openrouter": {"base_url": "https://openrouter.ai/api/v1", "env": "OPENROUTER_API_KEY","model": "meta-llama/llama-3.3-70b-instruct"},
|
| 300 |
-
"mistral": {"base_url": "https://api.mistral.ai/v1", "env": "MISTRAL_API_KEY", "model": "mistral-large-latest"},
|
| 301 |
-
"poe": {"base_url": None, "env": "POE_API_KEY", "model": "Claude-3.7-Sonnet"},
|
| 302 |
-
"ollama": {"base_url": "http://localhost:11434/api", "env": "OLLAMA_API_KEY", "model": "llama3.2"},
|
| 303 |
}
|
| 304 |
|
| 305 |
|
|
@@ -313,11 +336,11 @@ class LLMConfig:
|
|
| 313 |
max_tokens: int = 4096
|
| 314 |
temperature: float = 0.1 # low for deterministic formatting
|
| 315 |
# How many chars of blueprint text to send for styleguide generation (~10 K tokens)
|
| 316 |
-
blueprint_context_chars: int =
|
| 317 |
# Source paragraphs per LLM batch
|
| 318 |
para_batch_size: int = 15
|
| 319 |
# Retry settings
|
| 320 |
-
max_retries: int =
|
| 321 |
retry_delay_s: float = 5.0
|
| 322 |
|
| 323 |
|
|
@@ -346,6 +369,7 @@ def llm_config_from_args(
|
|
| 346 |
model=resolved_model or defaults.get("model", ""),
|
| 347 |
api_key=resolved_key,
|
| 348 |
base_url=defaults.get("base_url"),
|
|
|
|
| 349 |
)
|
| 350 |
|
| 351 |
|
|
@@ -1769,6 +1793,11 @@ class DocumentBuilder:
|
|
| 1769 |
for r in list(p_elem.findall(qn("w:r"))):
|
| 1770 |
if r not in marker_runs:
|
| 1771 |
p_elem.remove(r)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1772 |
for rd in parse_md_runs(llm_text):
|
| 1773 |
if not rd.text:
|
| 1774 |
continue
|
|
@@ -1790,22 +1819,22 @@ class DocumentBuilder:
|
|
| 1790 |
"[BUILD] LLM footnote id=%s para %d: '%.50s'",
|
| 1791 |
fd.footnote_id, para_idx, llm_text,
|
| 1792 |
)
|
| 1793 |
-
|
| 1794 |
-
|
| 1795 |
-
|
| 1796 |
-
|
| 1797 |
-
|
| 1798 |
-
|
| 1799 |
-
|
| 1800 |
-
|
| 1801 |
-
|
| 1802 |
-
|
| 1803 |
-
|
| 1804 |
-
|
| 1805 |
-
|
| 1806 |
-
rPr.remove(child)
|
| 1807 |
|
| 1808 |
# Ensure separator after marker matches blueprint convention
|
|
|
|
| 1809 |
self._normalize_fn_separator(p_elem)
|
| 1810 |
|
| 1811 |
fn_root.append(fn_xml)
|
|
@@ -1993,12 +2022,41 @@ class MultiProviderLLMClient:
|
|
| 1993 |
else:
|
| 1994 |
return self._openai_compat(system, user, config)
|
| 1995 |
except Exception as exc:
|
| 1996 |
-
|
| 1997 |
-
|
| 1998 |
-
|
| 1999 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2000 |
if attempt < config.max_retries:
|
| 2001 |
-
time.sleep(
|
| 2002 |
raise RuntimeError(
|
| 2003 |
f"[LLM] All {config.max_retries} attempts failed for {config.provider.value}"
|
| 2004 |
)
|
|
@@ -2172,9 +2230,6 @@ class MultiProviderLLMClient:
|
|
| 2172 |
text = resp.json().get("response", "")
|
| 2173 |
logger.debug("[LLM] Response: %d chars", len(text))
|
| 2174 |
return text
|
| 2175 |
-
text = resp.choices[0].message.content or ""
|
| 2176 |
-
logger.debug("[LLM] Response: %d chars", len(text))
|
| 2177 |
-
return text
|
| 2178 |
|
| 2179 |
# ββ Anthropic βββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 2180 |
def _anthropic(self, system: str, user: str, config: LLMConfig) -> str:
|
|
@@ -2416,16 +2471,22 @@ def parse_md_runs(text: str) -> List["RunData"]:
|
|
| 2416 |
# ============================================================================
|
| 2417 |
|
| 2418 |
_FMT_SYSTEM = """\
|
| 2419 |
-
You are a scholarly editor applying a strict style guide to
|
| 2420 |
-
|
| 2421 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2422 |
|
| 2423 |
Use Markdown for inline formatting:
|
| 2424 |
*italic* for italic text
|
| 2425 |
**bold** for bold text
|
| 2426 |
***bold italic*** for bold + italic
|
| 2427 |
-
No other Markdown. Return plain paragraph text with inline markers only.
|
| 2428 |
-
|
| 2429 |
"""
|
| 2430 |
|
| 2431 |
_PARA_USER_TMPL = """\
|
|
@@ -2516,6 +2577,14 @@ class LLMContentFormatter:
|
|
| 2516 |
)
|
| 2517 |
|
| 2518 |
for batch_start in range(0, len(to_format), config.para_batch_size):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2519 |
batch = to_format[batch_start: batch_start + config.para_batch_size]
|
| 2520 |
texts = [p.get_text() for p in batch]
|
| 2521 |
|
|
@@ -2663,6 +2732,7 @@ class LLMFormatTransplanter:
|
|
| 2663 |
styleguide_out: Optional[Path] = None,
|
| 2664 |
llm_mode: str = "both",
|
| 2665 |
user_style_overrides: Optional[Dict[str, str]] = None,
|
|
|
|
| 2666 |
) -> Optional[Path]:
|
| 2667 |
"""
|
| 2668 |
Returns the path to the saved styleguide if styleguide_out was set,
|
|
@@ -2675,6 +2745,8 @@ class LLMFormatTransplanter:
|
|
| 2675 |
logger.info(" Output : %s", output_path)
|
| 2676 |
logger.info(" Provider : %s / %s", llm_config.provider.value, llm_config.model)
|
| 2677 |
logger.info(" LLM mode : %s", llm_mode)
|
|
|
|
|
|
|
| 2678 |
logger.info(" Batch size : %d Context chars: %d",
|
| 2679 |
llm_config.para_batch_size, llm_config.blueprint_context_chars)
|
| 2680 |
logger.info("β" * 60)
|
|
@@ -2712,6 +2784,19 @@ class LLMFormatTransplanter:
|
|
| 2712 |
extractor = ContentExtractor()
|
| 2713 |
body_elements, footnotes = extractor.extract(src_doc)
|
| 2714 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2715 |
# ββ Phase 2-LLM: LLM content formatting βββββββββββββββββββββββ
|
| 2716 |
formatter = LLMContentFormatter(client)
|
| 2717 |
llm_para_map: Dict[int, str] = {}
|
|
@@ -2841,6 +2926,8 @@ Debug tips:
|
|
| 2841 |
help="Blueprint chars to send for styleguide gen (default: 40000)")
|
| 2842 |
llm_group.add_argument("--llm-batch", type=int, default=15, metavar="N",
|
| 2843 |
help="Source paragraphs per LLM batch (default: 15)")
|
|
|
|
|
|
|
| 2844 |
|
| 2845 |
args = parser.parse_args()
|
| 2846 |
|
|
@@ -2889,6 +2976,7 @@ Debug tips:
|
|
| 2889 |
styleguide_out=sg_out,
|
| 2890 |
llm_mode=args.llm_mode,
|
| 2891 |
user_style_overrides=overrides,
|
|
|
|
| 2892 |
)
|
| 2893 |
except Exception as exc:
|
| 2894 |
logger.error("Fatal error: %s", exc, exc_info=True)
|
|
|
|
| 22 |
import asyncio
|
| 23 |
import copy
|
| 24 |
import logging
|
| 25 |
+
import os
|
| 26 |
import re
|
| 27 |
import shutil
|
| 28 |
import sys
|
|
|
|
| 94 |
)
|
| 95 |
logger = logging.getLogger("FormatTransplant")
|
| 96 |
|
| 97 |
+
def load_dotenv(path: Optional[Path] = None):
|
| 98 |
+
"""Simple .env loader to avoid extra dependencies."""
|
| 99 |
+
env_path = path or Path(".env")
|
| 100 |
+
if not env_path.exists():
|
| 101 |
+
return
|
| 102 |
+
try:
|
| 103 |
+
with open(env_path, "r", encoding="utf-8") as f:
|
| 104 |
+
for line in f:
|
| 105 |
+
line = line.strip()
|
| 106 |
+
if not line or line.startswith("#"):
|
| 107 |
+
continue
|
| 108 |
+
if "=" in line:
|
| 109 |
+
key, _, value = line.partition("=")
|
| 110 |
+
# Strip quotes if present
|
| 111 |
+
value = value.strip().strip('"').strip("'")
|
| 112 |
+
os.environ[key.strip()] = value
|
| 113 |
+
except Exception as e:
|
| 114 |
+
logger.warning(f"Failed to load .env: {e}")
|
| 115 |
+
|
| 116 |
+
# Load environment early
|
| 117 |
+
load_dotenv()
|
| 118 |
+
|
| 119 |
# ============================================================================
|
| 120 |
# SEMANTIC CLASSIFICATION CONSTANTS
|
| 121 |
# ============================================================================
|
|
|
|
| 314 |
|
| 315 |
# Per-provider defaults β base_url=None means the provider uses its own SDK
|
| 316 |
PROVIDER_DEFAULTS: Dict[str, Dict[str, Any]] = {
|
| 317 |
+
"openai": {"base_url": "https://api.openai.com/v1", "env": "OPENAI_API_KEY", "model": "gpt-4o", "batch_size": 15},
|
| 318 |
+
"anthropic": {"base_url": None, "env": "ANTHROPIC_API_KEY", "model": "claude-3-5-sonnet-20241022", "batch_size": 15},
|
| 319 |
+
"groq": {"base_url": "https://api.groq.com/openai/v1", "env": "GROQ_API_KEY", "model": "llama-3.3-70b-versatile", "batch_size": 5},
|
| 320 |
+
"nebius": {"base_url": "https://api.studio.nebius.com/v1", "env": "NEBIUS_API_KEY", "model": "meta-llama/Meta-Llama-3.1-70B-Instruct", "batch_size": 15},
|
| 321 |
+
"scaleway": {"base_url": "https://api.scaleway.ai/v1", "env": "SCW_SECRET_KEY", "model": "llama-3.3-70b-instruct", "batch_size": 15},
|
| 322 |
+
"openrouter": {"base_url": "https://openrouter.ai/api/v1", "env": "OPENROUTER_API_KEY","model": "meta-llama/llama-3.3-70b-instruct", "batch_size": 15},
|
| 323 |
+
"mistral": {"base_url": "https://api.mistral.ai/v1", "env": "MISTRAL_API_KEY", "model": "mistral-large-latest", "batch_size": 15},
|
| 324 |
+
"poe": {"base_url": None, "env": "POE_API_KEY", "model": "Claude-3.7-Sonnet", "batch_size": 15},
|
| 325 |
+
"ollama": {"base_url": "http://localhost:11434/api", "env": "OLLAMA_API_KEY", "model": "llama3.2", "batch_size": 15},
|
| 326 |
}
|
| 327 |
|
| 328 |
|
|
|
|
| 336 |
max_tokens: int = 4096
|
| 337 |
temperature: float = 0.1 # low for deterministic formatting
|
| 338 |
# How many chars of blueprint text to send for styleguide generation (~10 K tokens)
|
| 339 |
+
blueprint_context_chars: int = 100_000
|
| 340 |
# Source paragraphs per LLM batch
|
| 341 |
para_batch_size: int = 15
|
| 342 |
# Retry settings
|
| 343 |
+
max_retries: int = 5
|
| 344 |
retry_delay_s: float = 5.0
|
| 345 |
|
| 346 |
|
|
|
|
| 369 |
model=resolved_model or defaults.get("model", ""),
|
| 370 |
api_key=resolved_key,
|
| 371 |
base_url=defaults.get("base_url"),
|
| 372 |
+
para_batch_size=defaults.get("batch_size", 15),
|
| 373 |
)
|
| 374 |
|
| 375 |
|
|
|
|
| 1793 |
for r in list(p_elem.findall(qn("w:r"))):
|
| 1794 |
if r not in marker_runs:
|
| 1795 |
p_elem.remove(r)
|
| 1796 |
+
|
| 1797 |
+
# Apply blueprint marker formatting to the preserved marker runs
|
| 1798 |
+
for r_marker in marker_runs:
|
| 1799 |
+
self._apply_fn_ref_style(r_marker)
|
| 1800 |
+
|
| 1801 |
for rd in parse_md_runs(llm_text):
|
| 1802 |
if not rd.text:
|
| 1803 |
continue
|
|
|
|
| 1819 |
"[BUILD] LLM footnote id=%s para %d: '%.50s'",
|
| 1820 |
fd.footnote_id, para_idx, llm_text,
|
| 1821 |
)
|
| 1822 |
+
else:
|
| 1823 |
+
# ββ Original run-clean path ββββββββββββββββββββββββ
|
| 1824 |
+
# Apply blueprint style to <w:footnoteRef> marker run;
|
| 1825 |
+
# strip source aesthetics from all other runs.
|
| 1826 |
+
for r_elem in p_elem.findall(qn("w:r")):
|
| 1827 |
+
fn_ref_check = _xpath(r_elem, ".//w:footnoteRef")
|
| 1828 |
+
if fn_ref_check:
|
| 1829 |
+
self._apply_fn_ref_style(r_elem)
|
| 1830 |
+
continue
|
| 1831 |
+
rPr = r_elem.find(qn("w:rPr"))
|
| 1832 |
+
if rPr is not None:
|
| 1833 |
+
for child in [c for c in rPr if c.tag not in KEEP_RPR_TAGS]:
|
| 1834 |
+
rPr.remove(child)
|
|
|
|
| 1835 |
|
| 1836 |
# Ensure separator after marker matches blueprint convention
|
| 1837 |
+
# (Re-applied even for LLM text to ensure tab preservation)
|
| 1838 |
self._normalize_fn_separator(p_elem)
|
| 1839 |
|
| 1840 |
fn_root.append(fn_xml)
|
|
|
|
| 2022 |
else:
|
| 2023 |
return self._openai_compat(system, user, config)
|
| 2024 |
except Exception as exc:
|
| 2025 |
+
is_rate_limit = False
|
| 2026 |
+
header_delay = None
|
| 2027 |
+
|
| 2028 |
+
# Try to extract retry-after from common SDK exceptions
|
| 2029 |
+
exc_str = str(exc).lower()
|
| 2030 |
+
if "429" in exc_str or "rate limit" in exc_str:
|
| 2031 |
+
is_rate_limit = True
|
| 2032 |
+
|
| 2033 |
+
# OpenAI / Groq / OpenRouter often put it in headers
|
| 2034 |
+
if hasattr(exc, "response") and hasattr(exc.response, "headers"):
|
| 2035 |
+
retry_after = exc.response.headers.get("retry-after")
|
| 2036 |
+
if retry_after and retry_after.isdigit():
|
| 2037 |
+
header_delay = float(retry_after)
|
| 2038 |
+
|
| 2039 |
+
# Exponential backoff: retry_delay * (2 ^ (attempt-1))
|
| 2040 |
+
delay = config.retry_delay_s * (2 ** (attempt - 1))
|
| 2041 |
+
|
| 2042 |
+
if header_delay:
|
| 2043 |
+
delay = max(delay, header_delay + 1.0) # Add 1s buffer
|
| 2044 |
+
elif is_rate_limit:
|
| 2045 |
+
delay *= 2 # Extra patience for rate limits
|
| 2046 |
+
|
| 2047 |
+
if is_rate_limit:
|
| 2048 |
+
logger.warning(
|
| 2049 |
+
"[LLM] %s rate limited (429). Waiting %.1f seconds... (Attempt %d/%d)",
|
| 2050 |
+
config.provider.value, delay, attempt, config.max_retries
|
| 2051 |
+
)
|
| 2052 |
+
else:
|
| 2053 |
+
logger.warning(
|
| 2054 |
+
"[LLM] %s attempt %d/%d failed: %s",
|
| 2055 |
+
config.provider.value, attempt, config.max_retries, exc,
|
| 2056 |
+
)
|
| 2057 |
+
|
| 2058 |
if attempt < config.max_retries:
|
| 2059 |
+
time.sleep(delay)
|
| 2060 |
raise RuntimeError(
|
| 2061 |
f"[LLM] All {config.max_retries} attempts failed for {config.provider.value}"
|
| 2062 |
)
|
|
|
|
| 2230 |
text = resp.json().get("response", "")
|
| 2231 |
logger.debug("[LLM] Response: %d chars", len(text))
|
| 2232 |
return text
|
|
|
|
|
|
|
|
|
|
| 2233 |
|
| 2234 |
# ββ Anthropic βββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 2235 |
def _anthropic(self, system: str, user: str, config: LLMConfig) -> str:
|
|
|
|
| 2471 |
# ============================================================================
|
| 2472 |
|
| 2473 |
_FMT_SYSTEM = """\
|
| 2474 |
+
You are a scholarly editor applying a strict editorial style guide to existing text.
|
| 2475 |
+
Your ONLY task is to apply inline formatting (bold/italic) to the text provided.
|
| 2476 |
+
|
| 2477 |
+
CRITICAL CONSTRAINTS:
|
| 2478 |
+
1. DO NOT translate the text.
|
| 2479 |
+
2. DO NOT paraphrase or summarize the text.
|
| 2480 |
+
3. DO NOT add any introductory remarks, commentary, or conclusions.
|
| 2481 |
+
4. DO NOT change a single word or punctuation mark of the original text.
|
| 2482 |
+
5. REPRODUCE the text EXACTLY as given, only adding Markdown markers for formatting.
|
| 2483 |
|
| 2484 |
Use Markdown for inline formatting:
|
| 2485 |
*italic* for italic text
|
| 2486 |
**bold** for bold text
|
| 2487 |
***bold italic*** for bold + italic
|
| 2488 |
+
No other Markdown (no # headings, no lists). Return plain paragraph text with inline markers only.
|
| 2489 |
+
Return EXACTLY one formatted response for each input paragraph.
|
| 2490 |
"""
|
| 2491 |
|
| 2492 |
_PARA_USER_TMPL = """\
|
|
|
|
| 2577 |
)
|
| 2578 |
|
| 2579 |
for batch_start in range(0, len(to_format), config.para_batch_size):
|
| 2580 |
+
# Inter-batch delay to stay under rate limits
|
| 2581 |
+
if batch_start > 0:
|
| 2582 |
+
batch_delay = 2.0 # 2 seconds between batches
|
| 2583 |
+
if config.provider == LLMProvider.GROQ:
|
| 2584 |
+
batch_delay = 15.0 # Extra delay for Groq (very tight limits)
|
| 2585 |
+
logger.info("[LLM-FMT] Inter-batch delay: %.1fs...", batch_delay)
|
| 2586 |
+
time.sleep(batch_delay)
|
| 2587 |
+
|
| 2588 |
batch = to_format[batch_start: batch_start + config.para_batch_size]
|
| 2589 |
texts = [p.get_text() for p in batch]
|
| 2590 |
|
|
|
|
| 2732 |
styleguide_out: Optional[Path] = None,
|
| 2733 |
llm_mode: str = "both",
|
| 2734 |
user_style_overrides: Optional[Dict[str, str]] = None,
|
| 2735 |
+
debug_limit: Optional[int] = None,
|
| 2736 |
) -> Optional[Path]:
|
| 2737 |
"""
|
| 2738 |
Returns the path to the saved styleguide if styleguide_out was set,
|
|
|
|
| 2745 |
logger.info(" Output : %s", output_path)
|
| 2746 |
logger.info(" Provider : %s / %s", llm_config.provider.value, llm_config.model)
|
| 2747 |
logger.info(" LLM mode : %s", llm_mode)
|
| 2748 |
+
if debug_limit:
|
| 2749 |
+
logger.info(" Debug limit: %d paragraphs", debug_limit)
|
| 2750 |
logger.info(" Batch size : %d Context chars: %d",
|
| 2751 |
llm_config.para_batch_size, llm_config.blueprint_context_chars)
|
| 2752 |
logger.info("β" * 60)
|
|
|
|
| 2784 |
extractor = ContentExtractor()
|
| 2785 |
body_elements, footnotes = extractor.extract(src_doc)
|
| 2786 |
|
| 2787 |
+
# Apply debug limit if requested
|
| 2788 |
+
if debug_limit:
|
| 2789 |
+
count = 0
|
| 2790 |
+
limited_body = []
|
| 2791 |
+
for e in body_elements:
|
| 2792 |
+
limited_body.append(e)
|
| 2793 |
+
if e.semantic_class != "table":
|
| 2794 |
+
count += 1
|
| 2795 |
+
if count >= debug_limit:
|
| 2796 |
+
break
|
| 2797 |
+
body_elements = limited_body
|
| 2798 |
+
logger.info("Debug limit applied: only processing first %d body paragraphs.", debug_limit)
|
| 2799 |
+
|
| 2800 |
# ββ Phase 2-LLM: LLM content formatting βββββββββββββββββββββββ
|
| 2801 |
formatter = LLMContentFormatter(client)
|
| 2802 |
llm_para_map: Dict[int, str] = {}
|
|
|
|
| 2926 |
help="Blueprint chars to send for styleguide gen (default: 40000)")
|
| 2927 |
llm_group.add_argument("--llm-batch", type=int, default=15, metavar="N",
|
| 2928 |
help="Source paragraphs per LLM batch (default: 15)")
|
| 2929 |
+
llm_group.add_argument("--debug-limit", type=int, default=None, metavar="N",
|
| 2930 |
+
help="Process only first N paragraphs (for testing)")
|
| 2931 |
|
| 2932 |
args = parser.parse_args()
|
| 2933 |
|
|
|
|
| 2976 |
styleguide_out=sg_out,
|
| 2977 |
llm_mode=args.llm_mode,
|
| 2978 |
user_style_overrides=overrides,
|
| 2979 |
+
debug_limit=args.debug_limit,
|
| 2980 |
)
|
| 2981 |
except Exception as exc:
|
| 2982 |
logger.error("Fatal error: %s", exc, exc_info=True)
|