Spaces:

SolusOps
/

Study-with-ChampAI

Running

App Files Files Community

Study-with-ChampAI / agents /document_agent.py

SolusOps

feat: agents package

dc124db verified 11 days ago

Raw

History Blame Contribute Delete

2.24 kB

	from __future__ import annotations
	from dataclasses import dataclass, field
	from typing import List, Dict
	from services.model_router import ModelRouter
	from services.json_parser import extract_json
	from config.prompts import DOCUMENT_EXTRACT_SYSTEM, DOCUMENT_VISION_PROMPT

	@dataclass
	class DocumentConcepts:
	topics: List[str]
	definitions: List[Dict[str, str]]
	facts: List[str]
	formulae: List[str]
	ocr_text: str = "" # raw OCR output (if from image)

	class DocumentAgent:
	"""
	Uses MiniCPM-V for all document understanding.
	- Text input: MiniCPM in text mode → concept extraction
	- Image input: MiniCPM in vision mode → OCR + concept extraction in one call
	Nemotron is never called here.
	"""
	def __init__(self, router: ModelRouter): self._router = router

	def extract(self, raw_text: str) -> DocumentConcepts:
	"""Text path: MiniCPM extracts structured concepts from text."""
	prompt = f"{DOCUMENT_EXTRACT_SYSTEM}\n\nExtract concepts from:\n\n{raw_text}"
	raw = self._router.understand(prompt=prompt)
	try:
	data = extract_json(raw)
	except ValueError as exc:
	raise ValueError(f"DocumentAgent: could not parse JSON. {exc}") from exc
	return DocumentConcepts(topics=data.get("topics",[]), definitions=data.get("definitions",[]),
	facts=data.get("facts",[]), formulae=data.get("formulae",[]))

	def extract_from_image(self, image_b64: str) -> DocumentConcepts:
	"""
	Image path: MiniCPM does OCR + concept extraction in a single vision call.
	Returns concepts AND the raw OCR text (stored in ocr_text for downstream use).
	"""
	raw = self._router.understand(prompt=DOCUMENT_VISION_PROMPT, image_b64=image_b64)
	try:
	data = extract_json(raw)
	except ValueError as exc:
	raise ValueError(f"DocumentAgent: could not parse JSON from image. {exc}") from exc
	return DocumentConcepts(topics=data.get("topics",[]), definitions=data.get("definitions",[]),
	facts=data.get("facts",[]), formulae=data.get("formulae",[]),
	ocr_text=data.get("ocr_text",""))