"""MiniCPM4.1-8B LLM client — runs on Modal GPU.""" from typing import Iterator, List, Optional from models.modal_client import get_llm class MiniCPMLLM: def __init__(self): self._remote = get_llm() def stream_answer( self, query: str, context: str, chat_history: Optional[List[dict]] = None, ) -> Iterator[str]: for token in self._remote.stream_answer.remote_gen( query, context, chat_history or [] ): yield token def generate_summary(self, document_text: str, summary_type: str = "financial") -> str: return self._remote.generate_summary.remote(document_text, summary_type) def evaluate_confidence(self, query: str, context: str, answer: str) -> float: return self._remote.evaluate_confidence.remote(query, context, answer) def extract_entities(self, document_text: str) -> dict: return self._remote.extract_entities.remote(document_text)