File size: 1,540 Bytes
c1d887c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
from backend.services.inference_client import call_hf_chat_async
import json
import logging

logger = logging.getLogger(__name__)

async def segment_educational_content(text: str):
    """
    Use DeepSeek to segment raw educational text into pedagogical chunks.
    Maintains Filipino/English (Taglish) nuances.
    """
    prompt = f"""
    You are an expert curriculum designer for the Filipino SHS STEM strand.
    Segment the following educational text into a JSON list of pedagogical chunks.
    Each chunk MUST belong to one of these types: 'Objective', 'LessonContent', 'PracticeProblem', 'Summary'.
    
    Return ONLY a JSON array of objects:
    [
      {{"type": "Objective", "content": "..."}},
      {{"type": "LessonContent", "content": "...", "title": "..."}},
      ...
    ]
    
    Text:
    {text}
    """
    
    try:
        # call_hf_chat_async is configured to route to DeepSeek solely per project rules
        response = await call_hf_chat_async(prompt)
        
        # Basic JSON extraction in case of LLM verbosity
        if "```json" in response:
            response = response.split("```json")[1].split("```")[0].strip()
        elif "```" in response:
            response = response.split("```")[1].split("```")[0].strip()
            
        return json.loads(response)
    except Exception as e:
        logger.error(f"Semantic segmentation failed: {e}")
        # Fallback: Treat as one large lesson chunk
        return [{"type": "LessonContent", "content": text, "title": "Extracted Content"}]