| import os |
| import json |
| import re |
| from typing import Optional |
|
|
| from openai import OpenAI |
|
|
|
|
| |
| NVIDIA_API_KEY = os.getenv("NVIDIA_API_KEY") |
| NIM_BASE_URL = os.getenv("NIM_BASE_URL", "https://integrate.api.nvidia.com/v1") |
| NIM_MODEL_DEFAULT = os.getenv("NIM_MODEL", "meta/llama-3.1-8b-instruct") |
|
|
| |
| _PREFACE_RE = re.compile(r"^(okay[, ]|sure[, ]|here(?:'|’)s|summary:?|note:?|context:)\b", re.I) |
| |
| _ANCHOR_RE = re.compile(r"\b(meeting\s*minutes|minutes\s*of\s*meeting|invoice|report|summary)\b", re.I) |
| _DOC_BLOCK_RE = re.compile(r"\[\[\[DOC\]\]\](.*)\[\[\[\/DOC\]\]\]", re.S) |
|
|
|
|
| def _sanitize_preface(text: str) -> str: |
| """Remove typical LLM prefaces and trim to a reliable anchor if present.""" |
| s = (text or "").lstrip() |
|
|
| |
| lines = s.splitlines() |
| while lines and _PREFACE_RE.match(lines[0].strip()): |
| lines.pop(0) |
| s = "\n".join(lines).lstrip() |
|
|
| |
| m = _ANCHOR_RE.search(s) |
| if m: |
| s = s[m.start():] |
|
|
| return s.strip() |
|
|
|
|
| def _extract_marked_block(text: str) -> Optional[str]: |
| """Keep only [[[DOC]]] ... [[[/DOC]]] if present.""" |
| m = _DOC_BLOCK_RE.search(text or "") |
| if m: |
| return m.group(1).strip() |
| return None |
|
|
|
|
| def enhance_with_nim( |
| extracted_text: str, |
| user_prompt: str, |
| model: Optional[str] = None, |
| timeout_s: int = 60, |
| ) -> str: |
| """ |
| Enhance document using NVIDIA NIM (OpenAI-compatible Chat Completions). |
| Enforces JSON-only output: {"enhanced_text": "..."}. |
| Returns only the enhanced text (string). On any failure, returns original text. |
| """ |
| if not NVIDIA_API_KEY: |
| |
| return extracted_text |
|
|
| model_name = model or NIM_MODEL_DEFAULT |
| client = OpenAI(api_key=NVIDIA_API_KEY, base_url=NIM_BASE_URL) |
|
|
| system = ( |
| "You are a professional document editor. Edit and improve the provided document according to the user's " |
| "instructions while preserving meaning, structure, headings, lists, and tone. " |
| "Do not include any preface, summary, or explanation. " |
| "Return only JSON with a single field 'enhanced_text'. " |
| "If you add any extra commentary, it will be ignored.\n" |
| "Optionally, also wrap the final edited document between markers [[[DOC]]] and [[[/DOC]]] " |
| "if you must return any non-JSON content." |
| ) |
|
|
| user = f"""User instructions: |
| {user_prompt} |
| |
| Original document: |
| {extracted_text} |
| """ |
|
|
| try: |
| resp = client.chat.completions.create( |
| model=model_name, |
| messages=[ |
| {"role": "system", "content": system}, |
| {"role": "user", "content": user}, |
| ], |
| temperature=0.1, |
| top_p=1.0, |
| max_tokens=8192, |
| response_format={"type": "json_object"}, |
| timeout=timeout_s, |
| ) |
|
|
| content = (resp.choices[0].message.content or "").strip() |
|
|
| |
| try: |
| obj = json.loads(content) |
| out = obj.get("enhanced_text") |
| if isinstance(out, str) and out.strip(): |
| out = out.strip() |
| else: |
| out = content |
| except json.JSONDecodeError: |
| |
| out = content |
|
|
| |
| block = _extract_marked_block(out) |
| if block: |
| out = block |
| out = _sanitize_preface(out) |
|
|
| return out or extracted_text |
|
|
| except Exception: |
| |
| return extracted_text |
|
|
|
|
| def enhance_doc( |
| extracted_text: str, |
| user_prompt: str, |
| nim_model: Optional[str] = None, |
| ) -> str: |
| """ |
| Public entrypoint: enhance via NIM only. |
| Returns the enhanced text or the original text on failure. |
| """ |
| return enhance_with_nim(extracted_text, user_prompt, model=nim_model) |
|
|