Spaces:
Running
Running
| """ | |
| Moltbot Hybrid Engine - Production v7.0.0 | |
| Multi-service: FastAPI + Ollama (optional) + OpenClaw/Clawdbot gateway (proxied at /gateway) | |
| Runs on Hugging Face Spaces | |
| Build: 2026-02-14 — Clawdbot installed in Space; gateway on 18789, proxied at /gateway | |
| Endpoints: | |
| GET / - Health check | |
| GET /health - Detailed health status | |
| GET /gateway - OpenClaw/Clawdbot Control UI (reverse proxy to gateway :18789) | |
| GET /gateway/{path} - OpenClaw proxy (path) | |
| POST /api/generate - LLM text generation | |
| ... | |
| """ | |
| import os | |
| import re | |
| import json | |
| import subprocess | |
| import logging | |
| from pathlib import Path | |
| from fastapi import FastAPI, HTTPException, Header, UploadFile, File, Request | |
| from fastapi.responses import StreamingResponse, Response | |
| from pydantic import BaseModel | |
| from typing import List, Optional, Dict, Any, Union | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger("moltbot-engine") | |
| # Initialize App | |
| app = FastAPI( | |
| title="Moltbot Hybrid Engine", | |
| description="AI agent for legal document processing - Dual LLM + file matching + Clawdbot gateway at /gateway", | |
| version="7.0.0" | |
| ) | |
| # API Key for authentication | |
| API_KEY = os.environ.get("MOLTBOT_API_KEY", "default_insecure_key") | |
| if API_KEY == "default_insecure_key": | |
| logger.warning("MOLTBOT_API_KEY not set. Using insecure default.") | |
| # HuggingFace token for Inference API | |
| HF_TOKEN = os.environ.get("HF_TOKEN", "") | |
| # Default HF model for inference API fallback | |
| HF_MODEL = os.environ.get("HF_MODEL", "Qwen/Qwen2.5-7B-Instruct") | |
| # ============================================================ | |
| # DATA MODELS | |
| # ============================================================ | |
| class GenerateRequest(BaseModel): | |
| model: str = "qwen2.5:1.5b" | |
| prompt: str | |
| stream: bool = False | |
| class FileSearchRequest(BaseModel): | |
| missing_filename: str | |
| available_files: List[str] | |
| context: Optional[str] = None | |
| class FileSearchResponse(BaseModel): | |
| status: str | |
| missing_filename: str | |
| suggestions: List[Dict[str, Any]] | |
| confidence: float | |
| reasoning: str | |
| class AnalysisRequest(BaseModel): | |
| report_data: Dict[str, Any] | |
| class AnalysisResponse(BaseModel): | |
| status: str | |
| critical_issues: int | |
| suggestions: List[str] | |
| # ============================================================ | |
| # HELPER FUNCTIONS | |
| # ============================================================ | |
| def tokenize(text: str) -> set: | |
| """Tokenize a filename for fuzzy matching.""" | |
| clean = re.sub(r'[_\-\.\(\)\[\]]', ' ', text.lower()) | |
| tokens = set(clean.split()) | |
| junk = {'pdf', 'mp4', 'jpg', 'jpeg', 'png', 'gif', 'doc', 'docx', | |
| 'the', 'and', 'to', 'of', 'in', 'a', 'for', 'with', 'on'} | |
| return tokens - junk | |
| def calculate_match_score(wanted: set, found: set) -> float: | |
| """Calculate token overlap score between two sets.""" | |
| if not wanted: | |
| return 0.0 | |
| common = wanted.intersection(found) | |
| return len(common) / len(wanted) | |
| def find_best_matches(missing_filename: str, available_files: List[str], max_results: int = 5) -> List[Dict[str, Any]]: | |
| """Find best fuzzy matches for a missing filename.""" | |
| wanted_tokens = tokenize(missing_filename) | |
| if not wanted_tokens: | |
| return [] | |
| matches = [] | |
| for filename in available_files: | |
| if filename == missing_filename: | |
| matches.append({"filename": filename, "score": 1.0, "match_type": "exact"}) | |
| continue | |
| if filename.lower() == missing_filename.lower(): | |
| matches.append({"filename": filename, "score": 0.99, "match_type": "case_insensitive"}) | |
| continue | |
| found_tokens = tokenize(filename) | |
| score = calculate_match_score(wanted_tokens, found_tokens) | |
| if score >= 0.5: | |
| matches.append({"filename": filename, "score": round(score, 3), "match_type": "token_match"}) | |
| matches.sort(key=lambda x: x["score"], reverse=True) | |
| return matches[:max_results] | |
| def check_ollama_status() -> dict: | |
| """Check if Ollama is running and responsive.""" | |
| try: | |
| result = subprocess.run( | |
| ["ollama", "list"], | |
| capture_output=True, text=True, timeout=10 | |
| ) | |
| if result.returncode == 0: | |
| models = [line.split()[0] for line in result.stdout.strip().split('\n')[1:] if line.strip()] | |
| return {"running": True, "models": models} | |
| return {"running": False, "error": result.stderr.strip()} | |
| except FileNotFoundError: | |
| return {"running": False, "error": "ollama binary not found"} | |
| except OSError as e: | |
| return {"running": False, "error": f"ollama exec error: {e}"} | |
| except subprocess.TimeoutExpired: | |
| return {"running": False, "error": "ollama list timed out"} | |
| except Exception as e: | |
| return {"running": False, "error": str(e)} | |
| def generate_with_ollama(model: str, prompt: str) -> Optional[str]: | |
| """Try to generate text with local Ollama. Returns None if unavailable.""" | |
| # try: | |
| # result = subprocess.run( | |
| # ["ollama", "run", model, prompt], | |
| # capture_output=True, text=True, timeout=120 | |
| # ) | |
| # if result.returncode == 0 and result.stdout.strip(): | |
| # return result.stdout.strip() | |
| # logger.warning(f"[OLLAMA] Non-zero return or empty output: {result.stderr[:200]}") | |
| # return None | |
| # except (FileNotFoundError, OSError) as e: | |
| # logger.warning(f"[OLLAMA] Not available: {e}") | |
| # return None | |
| # except subprocess.TimeoutExpired: | |
| # logger.warning("[OLLAMA] Timeout after 120s") | |
| # return None | |
| # except Exception as e: | |
| # logger.warning(f"[OLLAMA] Error: {e}") | |
| # return None | |
| try: | |
| import requests as req | |
| payload = { | |
| "model": model, | |
| "prompt": prompt, | |
| "stream": False, | |
| "options": { | |
| "num_ctx": 16384 | |
| } | |
| } | |
| resp = req.post("http://localhost:11434/api/generate", json=payload, timeout=120) | |
| if resp.status_code == 200: | |
| data = resp.json() | |
| text = data.get("response", "").strip() | |
| if text: | |
| return text | |
| logger.warning(f"[OLLAMA] Status {resp.status_code}: {resp.text[:200]}") | |
| return None | |
| except req.exceptions.ConnectionError: | |
| logger.warning("[OLLAMA] Not available (connection refused)") | |
| return None | |
| except req.exceptions.Timeout: | |
| logger.warning("[OLLAMA] Timeout after 120s") | |
| return None | |
| except Exception as e: | |
| logger.warning(f"[OLLAMA] Error: {e}") | |
| return None | |
| def generate_with_hf_api(prompt: str, model: str = None) -> Optional[str]: | |
| """Generate text using HuggingFace Inference API (free, no GPU needed). | |
| Uses chat_completion (conversational) — the only supported task for Qwen2.5-7B-Instruct. | |
| """ | |
| try: | |
| from huggingface_hub import InferenceClient | |
| hf_model = model or HF_MODEL | |
| token = HF_TOKEN if HF_TOKEN else None | |
| client = InferenceClient(token=token) | |
| # Use chat_completion — Qwen2.5-7B-Instruct is conversational only | |
| # (text_generation fails: "Supported task: conversational") | |
| response = client.chat_completion( | |
| model=hf_model, | |
| messages=[ | |
| {"role": "system", "content": "You are a helpful legal assistant."}, | |
| {"role": "user", "content": prompt}, | |
| ], | |
| max_tokens=1024, | |
| temperature=0.7, | |
| ) | |
| if response and response.choices: | |
| text = response.choices[0].message.content | |
| if text: | |
| return text.strip() | |
| logger.warning("[HF_API] Empty response from chat_completion") | |
| return None | |
| except ImportError: | |
| logger.error("[HF_API] huggingface_hub not installed") | |
| return None | |
| except Exception as e: | |
| logger.warning(f"[HF_API] chat_completion error: {e}") | |
| return None | |
| def analyze_report_data(data: dict) -> dict: | |
| """Analyze a verification report and return findings.""" | |
| suggestions = [] | |
| missing_files = data.get("missing_total", 0) | |
| structure_issues = data.get("page_structure_analysis", {}).get("bundles_with_structure_issues", 0) | |
| blank_pages = data.get("blank_placeholder_pages", 0) | |
| if missing_files > 0: | |
| suggestions.append(f"{missing_files} files missing - check paths in proven_paths_index.json") | |
| if structure_issues > 0: | |
| suggestions.append(f"{structure_issues} bundles have page structure problems - check embedding logic") | |
| if blank_pages > 0: | |
| suggestions.append(f"{blank_pages} blank placeholder pages found - files listed in TOC but not embedded") | |
| bundles = data.get("bundles", {}) | |
| for bundle_name, bundle_data in bundles.items(): | |
| if isinstance(bundle_data, dict): | |
| bundle_missing = bundle_data.get("missing_count", 0) | |
| if bundle_missing > 0: | |
| suggestions.append(f"Bundle {bundle_name}: {bundle_missing} files missing") | |
| critical_count = missing_files + structure_issues + blank_pages | |
| return { | |
| "status": "success", | |
| "critical_issues": critical_count, | |
| "suggestions": suggestions if suggestions else ["No critical issues found"], | |
| "summary": f"Analyzed report: {critical_count} critical issues across {len(bundles)} bundles" | |
| } | |
| # ============================================================ | |
| # API ENDPOINTS | |
| # ============================================================ | |
| def health_check(): | |
| """Basic health check.""" | |
| ollama = check_ollama_status() | |
| return { | |
| "status": "running", | |
| "service": "Moltbot Hybrid Engine", | |
| "version": "7.0.0", | |
| "ollama": ollama, | |
| "clawdbot": "OpenClaw gateway proxied at /gateway (if running)", | |
| "hf_inference_api": { | |
| "available": True, | |
| "model": HF_MODEL, | |
| "token_set": bool(HF_TOKEN) | |
| } | |
| } | |
| def detailed_health(): | |
| """Detailed health check with LLM status.""" | |
| ollama = check_ollama_status() | |
| return { | |
| "status": "healthy", | |
| "service": "moltbot-hybrid-engine", | |
| "version": "7.0.0", | |
| "llm_backends": { | |
| "ollama": { | |
| "running": ollama.get("running", False), | |
| "models": ollama.get("models", []), | |
| "error": ollama.get("error"), | |
| }, | |
| "hf_inference_api": { | |
| "available": True, | |
| "model": HF_MODEL, | |
| "token_set": bool(HF_TOKEN), | |
| "note": "Always available as fallback, no GPU needed" | |
| } | |
| }, | |
| "endpoints": ["/", "/health", "/api/generate", "/api/search", | |
| "/api/analyze", "/api/extract_date", "/tools/analyze_report", | |
| "/v1/chat/completions", "/v1/models", "/gateway (Clawdbot UI)"] | |
| } | |
| def security_info(): | |
| """Report security posture.""" | |
| return { | |
| "file_access": False, | |
| "network_access": "API only", | |
| "isolation": "Hugging Face container", | |
| "cannot_do": ["Read local files", "Write local files", "Delete files", | |
| "Access host filesystem", "Execute arbitrary commands"] | |
| } | |
| # ============================================================ | |
| # CLAWDBOT /gateway/verify — Direct verification endpoint | |
| # Called by CoV hook (cov_integration_hook.py step 6) | |
| # NOT proxied to OpenClaw; runs LLM verification directly | |
| # ============================================================ | |
| class GatewayVerifyRequest(BaseModel): | |
| edit_text: str | |
| local_cov_results: Optional[Dict[str, Any]] = None | |
| exhibit_schedule: Optional[str] = None # Authoritative exhibit schedule from build_exhibit_briefcase.py | |
| class GatewayVerifyResponse(BaseModel): | |
| verdict: str # "pass", "flag", "block" | |
| reasoning: str | |
| issues: List[str] | |
| async def gateway_verify(request: GatewayVerifyRequest, x_api_key: str = Header(None)): | |
| """ | |
| ClawdBot verification endpoint. Receives edit text + local CoV results, | |
| runs LLM verification with legal exhibit instruction + evidence context, | |
| and returns verdict (pass/flag/block), reasoning, and issues list. | |
| Called by courtBundleGenerator3/adapters/cov_integration_hook.py (step 6). | |
| """ | |
| # Auth: accept API key or allow if default key | |
| if API_KEY != "default_insecure_key": | |
| if not x_api_key or x_api_key != API_KEY: | |
| raise HTTPException(status_code=401, detail="Invalid or missing API Key") | |
| edit_text = request.edit_text or "" | |
| local_results = request.local_cov_results or {} | |
| if not edit_text.strip(): | |
| return GatewayVerifyResponse(verdict="block", reasoning="Empty edit text", issues=["No text provided"]) | |
| # Build verification prompt with legal exhibit instruction + evidence context | |
| legal_instruction = _get_legal_exhibit_instruction() | |
| # Include exhibit schedule if provided (authoritative mapping from build_exhibit_briefcase.py) | |
| exhibit_schedule_section = "" | |
| if request.exhibit_schedule: | |
| exhibit_schedule_section = f""" | |
| --- | |
| AUTHORITATIVE EXHIBIT SCHEDULE (from actual bundle output — use this to resolve all references): | |
| {request.exhibit_schedule[:6000]} | |
| """ | |
| # Summarise local CoV results so the LLM has context | |
| local_summary_parts = [] | |
| scores = local_results.get("scores", {}) | |
| if scores: | |
| local_summary_parts.append(f"Local CoV scores: {json.dumps(scores)}") | |
| critical = local_results.get("critical_issues", []) | |
| if critical: | |
| local_summary_parts.append(f"Critical issues from local checks: {critical}") | |
| warnings = local_results.get("warnings", []) | |
| if warnings: | |
| local_summary_parts.append(f"Warnings: {warnings[:5]}") | |
| local_summary = "\n".join(local_summary_parts) if local_summary_parts else "No local CoV results provided." | |
| verify_prompt = f"""You are ClawdBot, a legal document verification agent. | |
| {legal_instruction} | |
| {exhibit_schedule_section} | |
| --- | |
| LOCAL COV RESULTS (from timeline, accuracy, prosecutor, self-healing, Qwen checks): | |
| {local_summary} | |
| --- | |
| EDIT TEXT TO VERIFY: | |
| {edit_text[:8000]} | |
| --- | |
| TASK: Review the edit text above. Check: | |
| 1. Every evidence reference uses the format: Exhibit [Letter][Number] (DB-[N]) — [Filename]. Flag any bare DB-[N] or missing exhibit refs. | |
| Use the EXHIBIT SCHEDULE above to verify that the Letter+Number matches the correct DB number and filename. | |
| Flag any Exhibit [L][N] placeholders — these MUST be replaced with real letters from the schedule. | |
| 2. No sentences are cut off mid-word or mid-sentence (truncation). | |
| 3. Legal claims are supported by cited evidence. Flag unsupported assertions. | |
| 4. Dates, names, and case references are consistent. | |
| 5. No placeholder text like [●], [L][N], TBC, or TODO remains. | |
| Respond with EXACTLY this JSON (no markdown, no extra text): | |
| {{"verdict": "pass" or "flag" or "block", "reasoning": "one paragraph explanation", "issues": ["issue 1", "issue 2"]}} | |
| If all checks pass, verdict is "pass" with empty issues list. | |
| If minor issues found, verdict is "flag". | |
| If critical issues (truncation, missing exhibit refs, unsupported claims, [L][N] placeholders), verdict is "block".""" | |
| # Run through LLM (same dual backend as /api/generate) | |
| response_text = None | |
| # Try Ollama first | |
| response_text = generate_with_ollama("qwen2.5:1.5b", verify_prompt) | |
| # Fallback to HF Inference API | |
| if not response_text: | |
| response_text = generate_with_hf_api(verify_prompt) | |
| if not response_text: | |
| return GatewayVerifyResponse( | |
| verdict="flag", | |
| reasoning="LLM backends unavailable; cannot verify. Passing as flag for human review.", | |
| issues=["LLM unavailable — manual review required"] | |
| ) | |
| # Parse LLM response into structured verdict | |
| try: | |
| # Try to extract JSON from response (LLM may wrap it in markdown) | |
| json_match = re.search(r'\{[^{}]*"verdict"[^{}]*\}', response_text, re.DOTALL) | |
| if json_match: | |
| parsed = json.loads(json_match.group()) | |
| verdict = parsed.get("verdict", "flag") | |
| if verdict not in ("pass", "flag", "block"): | |
| verdict = "flag" | |
| reasoning = parsed.get("reasoning", response_text[:300]) | |
| issues = parsed.get("issues", []) | |
| if not isinstance(issues, list): | |
| issues = [str(issues)] | |
| return GatewayVerifyResponse(verdict=verdict, reasoning=reasoning, issues=issues) | |
| except (json.JSONDecodeError, AttributeError): | |
| pass | |
| # Fallback: couldn't parse JSON — return the raw response as a flag | |
| return GatewayVerifyResponse( | |
| verdict="flag", | |
| reasoning=response_text[:500], | |
| issues=["LLM response could not be parsed as structured JSON — manual review recommended"] | |
| ) | |
| # OpenClaw/Clawdbot gateway reverse proxy (gateway runs on 18789; Space exposes single port 7860) | |
| OPENCLAW_GATEWAY_URL = "http://127.0.0.1:18789" | |
| async def proxy_openclaw_gateway(request: Request, path: str = ""): | |
| """Proxy requests to OpenClaw/Clawdbot gateway so Control UI and WebChat are reachable at /gateway.""" | |
| try: | |
| import httpx | |
| except ImportError: | |
| raise HTTPException(status_code=503, detail="httpx not installed; cannot proxy to Clawdbot gateway") | |
| target_path = request.url.path | |
| if target_path.startswith("/gateway"): | |
| target_path = target_path[8:] or "/" # strip /gateway -> / or /foo | |
| target = f"{OPENCLAW_GATEWAY_URL}{target_path}" | |
| if request.url.query: | |
| target += "?" + request.url.query | |
| headers = {k: v for k, v in request.headers.raw if k.lower() not in (b"host", b"connection")} | |
| try: | |
| body = await request.body() | |
| except Exception: | |
| body = b"" | |
| async with httpx.AsyncClient(timeout=30.0) as client: | |
| try: | |
| r = await client.request( | |
| request.method, | |
| target, | |
| headers=headers, | |
| content=body, | |
| ) | |
| except httpx.ConnectError: | |
| return Response( | |
| content="Clawdbot gateway not reachable (is it running on 18789?). Start the Space and try again.", | |
| status_code=503, | |
| media_type="text/plain", | |
| ) | |
| except Exception as e: | |
| logger.warning(f"[GATEWAY PROXY] {e}") | |
| return Response(content=str(e), status_code=502, media_type="text/plain") | |
| out_headers = {} | |
| for k, v in r.headers.items(): | |
| if k.lower() not in ("transfer-encoding", "connection"): | |
| out_headers[k] = v | |
| return Response( | |
| content=r.content, | |
| status_code=r.status_code, | |
| headers=out_headers, | |
| media_type=r.headers.get("content-type", "application/octet-stream"), | |
| ) | |
| # Legal document exhibit reference instruction — injected into every generate/chat so edit sources always get it | |
| _prompts_dir = Path(__file__).resolve().parent / "prompts" | |
| _LEGAL_EXHIBIT_PROMPT_PATH = _prompts_dir / "legal_exhibit_instruction.txt" | |
| _FULL_EVIDENCE_REF_PATH = _prompts_dir / "full_evidence_reference.txt" | |
| _LEGAL_EXHIBIT_INSTRUCTION_CACHED: Optional[str] = None | |
| def _get_legal_exhibit_instruction() -> str: | |
| """Load legal exhibit instruction once; append full evidence list when present. Injected into all LLM requests.""" | |
| global _LEGAL_EXHIBIT_INSTRUCTION_CACHED | |
| if _LEGAL_EXHIBIT_INSTRUCTION_CACHED is not None: | |
| return _LEGAL_EXHIBIT_INSTRUCTION_CACHED | |
| if _LEGAL_EXHIBIT_PROMPT_PATH.exists(): | |
| _LEGAL_EXHIBIT_INSTRUCTION_CACHED = _LEGAL_EXHIBIT_PROMPT_PATH.read_text(encoding="utf-8", errors="replace") | |
| else: | |
| _LEGAL_EXHIBIT_INSTRUCTION_CACHED = "When referencing evidence use Exhibit [Letter][Number] (DB-[N]) — [Filename]. Do not use bare DB-[●]." | |
| if _FULL_EVIDENCE_REF_PATH.exists(): | |
| try: | |
| full = _FULL_EVIDENCE_REF_PATH.read_text(encoding="utf-8", errors="replace").strip() | |
| if full: | |
| _LEGAL_EXHIBIT_INSTRUCTION_CACHED = _LEGAL_EXHIBIT_INSTRUCTION_CACHED + "\n\nFull evidence list (use for every cite):\n" + full | |
| except Exception: | |
| pass | |
| return _LEGAL_EXHIBIT_INSTRUCTION_CACHED | |
| def get_legal_exhibit_instruction(): | |
| """Return the legal exhibit referencing instruction. Also injected automatically into /api/generate and /v1/chat/completions.""" | |
| return {"instruction": _get_legal_exhibit_instruction()} | |
| # --- LLM Generation (Dual Backend: Ollama → HF Inference API) --- | |
| async def generate(request: GenerateRequest, x_api_key: str = Header(None)): | |
| """Generate text using LLM. Tries Ollama first, falls back to HF Inference API. | |
| Legal exhibit instruction is prepended so all edits/amendments use Exhibit [Letter][Number] (DB-[N]) — [Filename]. | |
| """ | |
| if not x_api_key or x_api_key != API_KEY: | |
| raise HTTPException(status_code=401, detail="Invalid or missing API Key") | |
| # Inject legal exhibit instruction so edit sources always get the rule | |
| prompt_with_legal = _get_legal_exhibit_instruction() + "\n\n---\n\n" + request.prompt | |
| logger.info(f"[GENERATE] model={request.model}, prompt_len={len(prompt_with_legal)}") | |
| backend_used = None | |
| response_text = None | |
| # Backend 1: Try Ollama (local) | |
| response_text = generate_with_ollama(request.model, prompt_with_legal) | |
| if response_text: | |
| backend_used = "ollama" | |
| logger.info(f"[GENERATE] Ollama success, response_len={len(response_text)}") | |
| # Backend 2: Fallback to HF Inference API | |
| if not response_text: | |
| logger.info("[GENERATE] Ollama unavailable, trying HF Inference API...") | |
| response_text = generate_with_hf_api(prompt_with_legal) | |
| if response_text: | |
| backend_used = "hf_inference_api" | |
| logger.info(f"[GENERATE] HF API success, response_len={len(response_text)}") | |
| # Both failed | |
| if not response_text: | |
| raise HTTPException( | |
| status_code=503, | |
| detail="Both LLM backends unavailable. Ollama not running + HF Inference API failed. Check HF_TOKEN." | |
| ) | |
| return { | |
| "model": request.model, | |
| "response": response_text, | |
| "backend": backend_used, | |
| "done": True | |
| } | |
| # --- File Search --- | |
| async def search_file(request: FileSearchRequest, x_api_key: str = Header(None)): | |
| """Fuzzy file matching for missing evidence files.""" | |
| if not x_api_key or x_api_key != API_KEY: | |
| raise HTTPException(status_code=401, detail="Invalid or missing API Key") | |
| if len(request.missing_filename) > 200: | |
| return FileSearchResponse( | |
| status="error", missing_filename=request.missing_filename[:50] + "...", | |
| suggestions=[], confidence=0.0, | |
| reasoning="Filename too long - likely concatenated filenames" | |
| ) | |
| matches = find_best_matches(request.missing_filename, request.available_files) | |
| confidence = matches[0]["score"] if matches else 0.0 | |
| if not matches: | |
| reasoning = f"No matches found in {len(request.available_files)} files" | |
| elif matches[0]["match_type"] == "exact": | |
| reasoning = f"Exact match: {matches[0]['filename']}" | |
| else: | |
| reasoning = f"Token match with {int(confidence * 100)}% similarity" | |
| return FileSearchResponse( | |
| status="success", missing_filename=request.missing_filename, | |
| suggestions=matches, confidence=confidence, reasoning=reasoning | |
| ) | |
| # --- Report Analysis via JSON body --- | |
| async def analyze_report_json(request: AnalysisRequest, x_api_key: str = Header(None)): | |
| """Analyze a verification report (JSON body).""" | |
| if not x_api_key or x_api_key != API_KEY: | |
| raise HTTPException(status_code=401, detail="Invalid or missing API Key") | |
| result = analyze_report_data(request.report_data) | |
| return AnalysisResponse( | |
| status=result["status"], | |
| critical_issues=result["critical_issues"], | |
| suggestions=result["suggestions"] | |
| ) | |
| # --- Report Analysis via file upload --- | |
| async def analyze_report_upload( | |
| report_file: UploadFile = File(...), | |
| x_api_key: str = Header(None) | |
| ): | |
| """Analyze a verification report uploaded as a file.""" | |
| if not x_api_key or x_api_key != API_KEY: | |
| raise HTTPException(status_code=401, detail="Invalid or missing API Key") | |
| logger.info(f"[ANALYZE_REPORT] Received file: {report_file.filename}") | |
| try: | |
| content = await report_file.read() | |
| data = json.loads(content) | |
| except json.JSONDecodeError: | |
| raise HTTPException(status_code=400, detail="Invalid JSON in uploaded file") | |
| except Exception as e: | |
| raise HTTPException(status_code=400, detail=f"Error reading file: {str(e)}") | |
| result = analyze_report_data(data) | |
| logger.info(f"[ANALYZE_REPORT] Found {result['critical_issues']} critical issues") | |
| return result | |
| # --- Date Extraction --- | |
| async def extract_date(filename: str, x_api_key: str = Header(None)): | |
| """Extract date from a filename string.""" | |
| if not x_api_key or x_api_key != API_KEY: | |
| raise HTTPException(status_code=401, detail="Invalid or missing API Key") | |
| patterns = [ | |
| (r'(\d{4})-(\d{2})-(\d{2})', 'ISO'), | |
| (r'(\d{4})_(\d{2})_(\d{2})', 'underscore'), | |
| (r'(\d{1,2})-(\d{1,2})-(\d{2,4})', 'UK_dash'), | |
| (r'(\d{4})(\d{2})(\d{2})', 'compact'), | |
| ] | |
| for pattern, fmt in patterns: | |
| match = re.search(pattern, filename) | |
| if match: | |
| groups = match.groups() | |
| if fmt == 'UK_dash': | |
| day, month, year = groups | |
| if len(year) == 2: | |
| year = f"20{year}" | |
| return {"status": "found", "date": f"{year}-{int(month):02d}-{int(day):02d}", "format": fmt} | |
| else: | |
| year, month, day = groups | |
| return {"status": "found", "date": f"{year}-{int(month):02d}-{int(day):02d}", "format": fmt} | |
| return {"status": "not_found", "date": None} | |
| # ============================================================ | |
| # OPENAI-COMPATIBLE ENDPOINT (for Cursor IDE integration) | |
| # Enhanced: streaming, full message history, multi-model | |
| # ============================================================ | |
| class ChatMessage(BaseModel): | |
| role: str | |
| content: Union[str, List[Dict[str, Any]]] # OpenAI: string or array of parts (vision/R1) | |
| class ChatCompletionRequest(BaseModel): | |
| model: str = "moltbot-legal" | |
| messages: List[ChatMessage] | |
| temperature: Optional[float] = 0.7 | |
| max_tokens: Optional[int] = 2048 | |
| stream: Optional[bool] = False | |
| # Accept but ignore these (Cursor sends them) | |
| top_p: Optional[float] = None | |
| frequency_penalty: Optional[float] = None | |
| presence_penalty: Optional[float] = None | |
| stop: Optional[List[str]] = None | |
| n: Optional[int] = None | |
| # Model routing table — maps Cursor model names to backends | |
| # Additive: new models can be added here without touching any other code | |
| MODEL_ROUTING = { | |
| "moltbot-legal": {"backend": "hf", "hf_model": "Qwen/Qwen2.5-7B-Instruct"}, | |
| "qwen2.5": {"backend": "hf", "hf_model": "Qwen/Qwen2.5-7B-Instruct"}, | |
| "qwen2.5:1.5b": {"backend": "ollama", "ollama_model": "qwen2.5:1.5b"}, | |
| "deepseek-chat": {"backend": "hf", "hf_model": "deepseek-ai/DeepSeek-V2.5"}, | |
| "deepseek-coder": {"backend": "hf", "hf_model": "deepseek-ai/DeepSeek-Coder-V2-Instruct"}, | |
| } | |
| def _message_content_to_str(content: Union[str, List[Dict[str, Any]]]) -> str: | |
| """Normalize message content to string (OpenAI can send string or array of parts, e.g. vision/R1).""" | |
| if isinstance(content, str): | |
| return content | |
| if isinstance(content, list): | |
| return " ".join( | |
| p.get("text", "") for p in content | |
| if isinstance(p, dict) and isinstance(p.get("text"), str) | |
| ) | |
| return str(content) | |
| def _build_prompt_from_messages(messages: List[ChatMessage]) -> str: | |
| """Build a full prompt from the chat message history. | |
| Preserves conversation context, not just last message. | |
| """ | |
| parts = [] | |
| for msg in messages: | |
| text = _message_content_to_str(msg.content) | |
| if msg.role == "system": | |
| parts.append(f"System: {text}") | |
| elif msg.role == "user": | |
| parts.append(f"User: {text}") | |
| elif msg.role == "assistant": | |
| parts.append(f"Assistant: {text}") | |
| parts.append("Assistant:") | |
| return "\n\n".join(parts) | |
| def _build_hf_chat_messages(messages: List[ChatMessage]) -> List[Dict[str, str]]: | |
| """Build HF chat_completion message list from OpenAI messages.""" | |
| return [{"role": msg.role, "content": _message_content_to_str(msg.content)} for msg in messages] | |
| def _generate_for_model(model_name: str, messages: List[ChatMessage], temperature: float = 0.7, max_tokens: int = 2048) -> Optional[str]: | |
| """Route generation to the right backend based on model name. | |
| Falls back through: model-specific backend → HF chat → HF text → Ollama. | |
| """ | |
| route = MODEL_ROUTING.get(model_name, MODEL_ROUTING.get("moltbot-legal")) | |
| # Try HF chat_completion first (best quality for instruct models) | |
| try: | |
| from huggingface_hub import InferenceClient | |
| hf_model = route.get("hf_model", HF_MODEL) | |
| token = HF_TOKEN if HF_TOKEN else None | |
| client = InferenceClient(token=token) | |
| chat_msgs = _build_hf_chat_messages(messages) | |
| response = client.chat_completion( | |
| model=hf_model, | |
| messages=chat_msgs, | |
| max_tokens=max_tokens, | |
| temperature=temperature, | |
| ) | |
| if response and response.choices: | |
| text = response.choices[0].message.content.strip() | |
| if text: | |
| logger.info(f"[CHAT] HF chat_completion success, model={hf_model}, len={len(text)}") | |
| return text | |
| except Exception as e: | |
| logger.warning(f"[CHAT] HF chat_completion failed: {e}") | |
| # Fallback: HF text_generation | |
| prompt = _build_prompt_from_messages(messages) | |
| text = generate_with_hf_api(prompt, model=route.get("hf_model")) | |
| if text: | |
| logger.info(f"[CHAT] HF text_generation fallback success, len={len(text)}") | |
| return text | |
| # Fallback: Ollama | |
| ollama_model = route.get("ollama_model", "qwen2.5:1.5b") | |
| text = generate_with_ollama(ollama_model, prompt) | |
| if text: | |
| logger.info(f"[CHAT] Ollama fallback success, len={len(text)}") | |
| return text | |
| return None | |
| def _stream_sse_response(model: str, response_text: str, request_id: str): | |
| """Yield Server-Sent Events chunks that Cursor expects for streaming.""" | |
| import time | |
| now = int(time.time()) | |
| # Split response into word-level chunks for realistic streaming | |
| words = response_text.split(" ") | |
| for i, word in enumerate(words): | |
| chunk_text = word + (" " if i < len(words) - 1 else "") | |
| chunk = { | |
| "id": request_id, | |
| "object": "chat.completion.chunk", | |
| "created": now, | |
| "model": model, | |
| "choices": [{ | |
| "index": 0, | |
| "delta": {"content": chunk_text}, | |
| "finish_reason": None | |
| }] | |
| } | |
| yield f"data: {json.dumps(chunk)}\n\n" | |
| # Final chunk with finish_reason | |
| final = { | |
| "id": request_id, | |
| "object": "chat.completion.chunk", | |
| "created": now, | |
| "model": model, | |
| "choices": [{ | |
| "index": 0, | |
| "delta": {}, | |
| "finish_reason": "stop" | |
| }] | |
| } | |
| yield f"data: {json.dumps(final)}\n\n" | |
| yield "data: [DONE]\n\n" | |
| async def chat_completions( | |
| request: ChatCompletionRequest, | |
| authorization: str = Header(None), | |
| x_api_key: str = Header(None), | |
| ): | |
| """OpenAI-compatible chat completions endpoint. | |
| Supports both streaming and non-streaming modes. | |
| Accepts 'Authorization: Bearer <key>' or 'x-api-key: <key>'. | |
| Routes to appropriate backend based on model name. | |
| """ | |
| import time | |
| # Auth: accept both header styles. | |
| # Accept MOLTBOT_API_KEY, HF_TOKEN, or any non-empty key when using default key. | |
| # This is a personal Space running a free model — HF provides its own rate limiting. | |
| api_key = None | |
| if authorization and authorization.startswith("Bearer "): | |
| api_key = authorization[7:].strip() | |
| if not api_key: | |
| api_key = x_api_key | |
| # If MOLTBOT_API_KEY was explicitly set (not default), enforce it strictly | |
| if API_KEY != "default_insecure_key": | |
| valid = api_key and (api_key == API_KEY or (HF_TOKEN and api_key == HF_TOKEN)) | |
| else: | |
| # Default key in use: accept any non-empty key (so Cursor can send DeepSeek/HF/any key) | |
| valid = bool(api_key) | |
| if not valid: | |
| raise HTTPException(status_code=401, detail="Invalid or missing API Key") | |
| logger.info(f"[CHAT] model={request.model}, messages={len(request.messages)}, stream={request.stream}") | |
| # Inject legal exhibit instruction so every edit/amendment/insert uses Exhibit [Letter][Number] (DB-[N]) — [Filename] | |
| legal_system = ChatMessage(role="system", content=_get_legal_exhibit_instruction()) | |
| messages_with_legal = [legal_system] + list(request.messages) | |
| # Generate response via model routing | |
| response_text = _generate_for_model( | |
| request.model, messages_with_legal, | |
| temperature=request.temperature or 0.7, | |
| max_tokens=request.max_tokens or 2048, | |
| ) | |
| if not response_text: | |
| raise HTTPException(status_code=503, detail="All LLM backends unavailable") | |
| now = int(time.time()) | |
| request_id = f"chatcmpl-moltbot-{now}" | |
| # STREAMING MODE — Cursor sends stream=true by default | |
| if request.stream: | |
| return StreamingResponse( | |
| _stream_sse_response(request.model, response_text, request_id), | |
| media_type="text/event-stream", | |
| headers={ | |
| "Cache-Control": "no-cache", | |
| "Connection": "keep-alive", | |
| "X-Accel-Buffering": "no", | |
| }, | |
| ) | |
| # NON-STREAMING MODE — standard OpenAI response | |
| return { | |
| "id": request_id, | |
| "object": "chat.completion", | |
| "created": now, | |
| "model": request.model, | |
| "choices": [{ | |
| "index": 0, | |
| "message": {"role": "assistant", "content": response_text}, | |
| "finish_reason": "stop" | |
| }], | |
| "usage": { | |
| "prompt_tokens": len(" ".join(_message_content_to_str(m.content) for m in request.messages).split()), | |
| "completion_tokens": len(response_text.split()), | |
| "total_tokens": len(" ".join(_message_content_to_str(m.content) for m in request.messages).split()) + len(response_text.split()), | |
| } | |
| } | |
| async def list_models(): | |
| """OpenAI-compatible model listing. Cursor discovers models via this endpoint.""" | |
| return { | |
| "object": "list", | |
| "data": [ | |
| {"id": "moltbot-legal", "object": "model", "created": 1700000000, "owned_by": "moltbot"}, | |
| {"id": "qwen2.5", "object": "model", "created": 1700000000, "owned_by": "alibaba-cloud"}, | |
| {"id": "deepseek-chat", "object": "model", "created": 1700000000, "owned_by": "deepseek"}, | |
| {"id": "deepseek-coder", "object": "model", "created": 1700000000, "owned_by": "deepseek"}, | |
| ] | |
| } | |
| if __name__ == "__main__": | |
| import uvicorn | |
| uvicorn.run(app, host="0.0.0.0", port=7860) | |