mathpulse-api-v3test / services /segmentation_service.py
github-actions[bot]
🚀 Auto-deploy backend from GitHub (9923591)
c1d887c
from backend.services.inference_client import call_hf_chat_async
import json
import logging
logger = logging.getLogger(__name__)
async def segment_educational_content(text: str):
"""
Use DeepSeek to segment raw educational text into pedagogical chunks.
Maintains Filipino/English (Taglish) nuances.
"""
prompt = f"""
You are an expert curriculum designer for the Filipino SHS STEM strand.
Segment the following educational text into a JSON list of pedagogical chunks.
Each chunk MUST belong to one of these types: 'Objective', 'LessonContent', 'PracticeProblem', 'Summary'.
Return ONLY a JSON array of objects:
[
{{"type": "Objective", "content": "..."}},
{{"type": "LessonContent", "content": "...", "title": "..."}},
...
]
Text:
{text}
"""
try:
# call_hf_chat_async is configured to route to DeepSeek solely per project rules
response = await call_hf_chat_async(prompt)
# Basic JSON extraction in case of LLM verbosity
if "```json" in response:
response = response.split("```json")[1].split("```")[0].strip()
elif "```" in response:
response = response.split("```")[1].split("```")[0].strip()
return json.loads(response)
except Exception as e:
logger.error(f"Semantic segmentation failed: {e}")
# Fallback: Treat as one large lesson chunk
return [{"type": "LessonContent", "content": text, "title": "Extracted Content"}]