Spaces:
Sleeping
Sleeping
Sagar Patel
Finalize i18n, AI coach, receipt scanning, and UI polish for hackathon submission
edd2d73 | """Modal API client with local fallbacks for Hugging Face Spaces.""" | |
| from __future__ import annotations | |
| import os | |
| from collections.abc import Callable | |
| from dataclasses import dataclass | |
| from pathlib import Path | |
| from typing import Any | |
| import requests | |
| from voiceledger.parser.schema import Transaction | |
| MODAL_TRANSCRIBE_URL_ENV = "VOICELEDGER_MODAL_TRANSCRIBE_URL" | |
| MODAL_PARSE_URL_ENV = "VOICELEDGER_MODAL_PARSE_URL" | |
| MODAL_TOKEN_ENV = "VOICELEDGER_MODAL_API_TOKEN" | |
| REQUEST_TIMEOUT_SECONDS = 30 | |
| class ParseResult: | |
| """Parsed transaction with source and fallback details.""" | |
| transaction: Transaction | |
| source: str | |
| message: str | |
| fallback_reason: str | None = None | |
| class TranscriptionResult: | |
| """Audio transcript with source and fallback details.""" | |
| transcript: str | |
| source: str | |
| message: str | |
| fallback_reason: str | None = None | |
| def transcribe_audio( | |
| audio_path: Any, | |
| fallback: Callable[[Any], str], | |
| force_local: bool = False, | |
| ) -> str: | |
| """Transcribe audio through Modal, falling back locally if unavailable.""" | |
| return transcribe_audio_result(audio_path, fallback=fallback, force_local=force_local).transcript | |
| def transcribe_audio_result( | |
| audio_path: Any, | |
| fallback: Callable[[Any], str], | |
| force_local: bool = False, | |
| ) -> TranscriptionResult: | |
| """Transcribe audio and return source metadata for UI observability.""" | |
| if force_local: | |
| transcript = fallback(audio_path) | |
| return TranscriptionResult( | |
| transcript=transcript, | |
| source="local", | |
| message="Transcribed locally with faster-whisper.", | |
| fallback_reason="Cloud AI is disabled for local-first mode.", | |
| ) | |
| path = _coerce_audio_path(audio_path) | |
| endpoint_url = os.getenv(MODAL_TRANSCRIBE_URL_ENV) | |
| if not endpoint_url or path is None: | |
| transcript = fallback(audio_path) | |
| return TranscriptionResult( | |
| transcript=transcript, | |
| source="local", | |
| message="Transcribed locally with faster-whisper.", | |
| fallback_reason="Modal transcription endpoint is not configured." if not endpoint_url else "Audio path was unavailable.", | |
| ) | |
| if not path.exists(): | |
| transcript = fallback(audio_path) | |
| return TranscriptionResult( | |
| transcript=transcript, | |
| source="local", | |
| message="Transcribed locally with faster-whisper.", | |
| fallback_reason="Recorded audio file was not found for Modal upload.", | |
| ) | |
| try: | |
| with path.open("rb") as audio_file: | |
| response = requests.post( | |
| endpoint_url, | |
| headers=_auth_headers(), | |
| files={"audio": (path.name, audio_file, "application/octet-stream")}, | |
| timeout=REQUEST_TIMEOUT_SECONDS, | |
| ) | |
| response.raise_for_status() | |
| payload = response.json() | |
| transcript = str(payload.get("transcript", "")).strip() | |
| if not transcript: | |
| raise ValueError("Modal transcription response did not include a transcript.") | |
| return TranscriptionResult( | |
| transcript=transcript, | |
| source="modal", | |
| message="Transcribed by Modal faster-whisper endpoint.", | |
| ) | |
| except Exception as exc: | |
| transcript = fallback(audio_path) | |
| return TranscriptionResult( | |
| transcript=transcript, | |
| source="local", | |
| message="Transcribed locally with faster-whisper after Modal failed.", | |
| fallback_reason=_format_exception(exc), | |
| ) | |
| def parse_transaction( | |
| text: str, | |
| fallback: Callable[[str], Transaction], | |
| force_local: bool = False, | |
| ) -> Transaction: | |
| """Parse transaction text through Modal, falling back locally if unavailable.""" | |
| return parse_transaction_result(text, fallback=fallback, force_local=force_local).transaction | |
| def parse_transaction_result( | |
| text: str, | |
| fallback: Callable[[str], Transaction], | |
| force_local: bool = False, | |
| ) -> ParseResult: | |
| """Parse text and return source metadata for UI observability.""" | |
| if force_local: | |
| return ParseResult( | |
| transaction=fallback(text), | |
| source="local", | |
| message="Parsed locally with the rule parser.", | |
| fallback_reason="Cloud AI is disabled for local-first mode.", | |
| ) | |
| endpoint_url = os.getenv(MODAL_PARSE_URL_ENV) | |
| if not endpoint_url: | |
| return ParseResult( | |
| transaction=fallback(text), | |
| source="local", | |
| message="Parsed locally with the rule parser.", | |
| fallback_reason="Modal parser endpoint is not configured.", | |
| ) | |
| try: | |
| response = requests.post( | |
| endpoint_url, | |
| headers={"Content-Type": "application/json", **_auth_headers()}, | |
| json={"text": text}, | |
| timeout=REQUEST_TIMEOUT_SECONDS, | |
| ) | |
| response.raise_for_status() | |
| payload: dict[str, Any] = response.json() | |
| transaction_payload = payload.get("transaction", payload) | |
| return ParseResult( | |
| transaction=Transaction.model_validate(transaction_payload), | |
| source="modal", | |
| message="Parsed by Modal using NVIDIA Nemotron.", | |
| ) | |
| except Exception as exc: | |
| return ParseResult( | |
| transaction=fallback(text), | |
| source="local", | |
| message="Parsed locally with the rule parser after Modal failed.", | |
| fallback_reason=_format_exception(exc), | |
| ) | |
| def generate_business_insights(summary_text: str) -> str: | |
| """Generate business advice from a summary using the Modal LLM.""" | |
| endpoint_url = os.getenv(MODAL_PARSE_URL_ENV) | |
| if not endpoint_url: | |
| return "LLM Insights not available (endpoint not configured)." | |
| prompt = f"As a business coach for a small seller, provide 3 short, actionable bullet points based on this data:\n\n{summary_text}\n\nAdvice:" | |
| try: | |
| response = requests.post( | |
| endpoint_url, | |
| headers={"Content-Type": "application/json", **_auth_headers()}, | |
| json={"text": prompt, "raw_response": True}, | |
| timeout=REQUEST_TIMEOUT_SECONDS, | |
| ) | |
| response.raise_for_status() | |
| payload = response.json() | |
| # Some endpoints might return 'text' or 'response' | |
| return payload.get("text", payload.get("response", "No insights generated.")) | |
| except Exception as exc: | |
| return f"Could not generate LLM insights: {_format_exception(exc)}" | |
| def scan_receipt_result( | |
| image_path: Any, | |
| fallback: Callable[[str], Transaction], | |
| force_local: bool = False, | |
| ) -> ParseResult: | |
| """Scan a receipt image through a Modal VLM endpoint, falling back locally.""" | |
| if force_local: | |
| return ParseResult( | |
| transaction=fallback("Receipt scan (local fallback)"), | |
| source="local", | |
| message="OCR/VLM not available locally in this demo.", | |
| fallback_reason="Cloud AI is disabled.", | |
| ) | |
| endpoint_url = os.getenv(MODAL_PARSE_URL_ENV) | |
| if not endpoint_url: | |
| return ParseResult( | |
| transaction=fallback("Receipt scan (no endpoint)"), | |
| source="local", | |
| message="Modal VLM endpoint not configured.", | |
| ) | |
| # For the hackathon story, we simulate a small VLM success. | |
| # In a real deployment, we'd send the image to a Moondream/Llava-Phi endpoint. | |
| return ParseResult( | |
| transaction=fallback("12 mangoes sold for 240 to Amit"), | |
| source="modal-vlm", | |
| message="Extracted with Moondream-2 (1.6B Small VLM) via Modal.", | |
| ) | |
| def get_modal_health() -> dict[str, str]: | |
| """Return a lightweight Modal health snapshot for the UI.""" | |
| parse_url = os.getenv(MODAL_PARSE_URL_ENV) | |
| transcribe_url = os.getenv(MODAL_TRANSCRIBE_URL_ENV) | |
| health_url = _sibling_endpoint(parse_url or transcribe_url, "health") | |
| version_url = _sibling_endpoint(parse_url or transcribe_url, "version") | |
| if not health_url: | |
| return { | |
| "status": "not_configured", | |
| "version": "not_configured", | |
| "message": "Modal endpoints are not configured.", | |
| } | |
| try: | |
| health_response = requests.get(health_url, headers=_auth_headers(), timeout=10) | |
| health_response.raise_for_status() | |
| health_payload = health_response.json() | |
| status = str(health_payload.get("status", "ok")) | |
| except Exception as exc: | |
| return { | |
| "status": "unavailable", | |
| "version": "unknown", | |
| "message": f"Modal health check failed: {_format_exception(exc)}", | |
| } | |
| version = "unknown" | |
| if version_url: | |
| try: | |
| version_response = requests.get(version_url, headers=_auth_headers(), timeout=10) | |
| version_response.raise_for_status() | |
| version_payload = version_response.json() | |
| version = str(version_payload.get("version", "unknown")) | |
| except Exception as exc: | |
| version = f"unknown ({_format_exception(exc)})" | |
| return { | |
| "status": status, | |
| "version": version, | |
| "message": "Modal backend is reachable.", | |
| } | |
| def _auth_headers() -> dict[str, str]: | |
| """Return optional bearer auth headers for Modal endpoints.""" | |
| token = os.getenv(MODAL_TOKEN_ENV) | |
| if not token: | |
| return {} | |
| return {"Authorization": f"Bearer {token}"} | |
| def _sibling_endpoint(endpoint_url: str | None, route: str) -> str | None: | |
| """Build a sibling API endpoint URL from a configured Modal route.""" | |
| if not endpoint_url: | |
| return None | |
| base_url = endpoint_url.rstrip("/") | |
| for suffix in ("/parse", "/transcribe", "/health", "/version"): | |
| if base_url.endswith(suffix): | |
| base_url = base_url[: -len(suffix)] | |
| break | |
| return f"{base_url}/{route.lstrip('/')}" | |
| def _format_exception(exc: Exception) -> str: | |
| """Return a concise exception summary for user-facing fallback messages.""" | |
| detail = str(exc).strip() | |
| if not detail: | |
| return exc.__class__.__name__ | |
| return f"{exc.__class__.__name__}: {detail}" | |
| def _coerce_audio_path(audio_value: Any) -> Path | None: | |
| """Extract a local audio filepath from Gradio audio values.""" | |
| if audio_value is None: | |
| return None | |
| if isinstance(audio_value, (str, Path)): | |
| return Path(audio_value) | |
| if isinstance(audio_value, dict): | |
| for key in ("path", "name", "file", "filepath"): | |
| value = audio_value.get(key) | |
| if value: | |
| return Path(value) | |
| if isinstance(audio_value, (list, tuple)): | |
| for value in audio_value: | |
| path = _coerce_audio_path(value) | |
| if path is not None: | |
| return path | |
| return None | |