File size: 4,637 Bytes
35b7945
2d18165
35b7945
e6a070e
034ce92
8a8c652
 
d98519f
8a8c652
 
 
 
 
 
e6a070e
 
 
3eda1ab
8a8c652
35b7945
034ce92
8a8c652
 
 
 
 
 
 
 
 
 
 
 
 
 
c9fec7f
 
e6a070e
388ecb4
 
 
07f129c
91f2c92
35b7945
388ecb4
2d18165
cd0448f
 
6282a9e
 
 
 
 
 
2d18165
cd0448f
 
2d18165
 
cd0448f
 
6282a9e
91f2c92
8a8c652
 
 
 
 
 
 
 
 
 
 
 
07f129c
8a8c652
c9fec7f
e6a070e
c9fec7f
 
 
 
07f129c
91f2c92
35b7945
c9fec7f
 
e6a070e
 
 
 
35b7945
ce4e9ec
 
 
35b7945
 
8a8c652
 
 
 
 
 
 
 
 
 
 
 
e6a070e
ce4e9ec
8a8c652
e6a070e
 
 
ce4e9ec
e6a070e
35b7945
 
 
ce4e9ec
f1fc208
07f129c
 
 
 
 
 
ce4e9ec
e6a070e
 
 
 
2d18165
e6a070e
 
 
388ecb4
034ce92
e6a070e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import os
import gradio as gr
from transformers import pipeline, AutoTokenizer
import torch
import spaces
import json
from huggingface_hub import HfApi, upload_file

# --- Constants ---
HF_TOKEN = os.environ.get("HF_TOKEN")
DATASET_REPO = "Pisethan/khmer-lesson-dataset-generated"
LOCAL_JSONL = "generated_lessons.jsonl"

# --- Options ---
grade_options = ["1", "2", "3", "4", "5", "6"]
topic_options = ["Addition", "Subtraction", "Counting", "Number Recognition", "Multiplication", "Division"]
level_options = ["Beginner", "Intermediate", "Advanced"]

# --- Tokenizer (global) ---
tokenizer = AutoTokenizer.from_pretrained("Pisethan/khmer-lesson-model", token=HF_TOKEN)

# --- Helper to save and upload ---
def save_to_jsonl(record):
    with open(LOCAL_JSONL, "a", encoding="utf-8") as f:
        f.write(json.dumps(record, ensure_ascii=False) + "\n")

    upload_file(
        path_or_fileobj=LOCAL_JSONL,
        path_in_repo="generated_lessons.jsonl",
        repo_id=DATASET_REPO,
        repo_type="dataset",
        token=HF_TOKEN
    )

# --- Generation for one lesson ---
@spaces.GPU
def generate_lesson(grade, topic, level):
    device = 0 if torch.cuda.is_available() else -1
    pipe = pipeline(
        "text-generation",
        model="Pisethan/khmer-lesson-model-v2",
        tokenizer=tokenizer,
        device=device,
        token=HF_TOKEN
    )

    prompt = f"""
You are a lesson planning assistant. Return only one structured Khmer math lesson plan with these fields:

Lesson Title:
Objective:
Activity:
Instruction (Khmer):
Materials:

Please follow the structure exactly.

Grade: {grade}
Topic: {topic}
TaRL Level: {level}
"""

    output = pipe(prompt, max_new_tokens=300, temperature=0.7, do_sample=True, eos_token_id=tokenizer.eos_token_id)
    result = output[0]['generated_text']

    # Save to dataset
    record = {
        "grade": grade,
        "topic": topic,
        "level": level,
        "prompt": prompt.strip(),
        "completion": result.strip()
    }
    save_to_jsonl(record)
    return result

# --- Generation for all combinations ---
@spaces.GPU
def generate_all_lessons():
    device = 0 if torch.cuda.is_available() else -1
    pipe = pipeline(
        "text-generation",
        model="Pisethan/khmer-lesson-model-v2",
        tokenizer=tokenizer,
        device=device,
        token=HF_TOKEN
    )

    results = ""
    for grade in grade_options:
        for topic in topic_options:
            for level in level_options:
                prompt = f"""Generate a Khmer math lesson plan.

Grade: {grade}
Topic: {topic}
TaRL Level: {level}"""
                output = pipe(prompt, max_new_tokens=200, temperature=0.7, do_sample=True)
                result = output[0]['generated_text']

                record = {
                    "grade": grade,
                    "topic": topic,
                    "level": level,
                    "prompt": prompt.strip(),
                    "completion": result.strip()
                }
                save_to_jsonl(record)

                results += f"πŸ”Ή αžαŸ’αž“αžΆαž€αŸ‹ {grade} | {topic} | {level}\n{result}\n\n{'-'*50}\n\n"
    return results

# --- UI ---
with gr.Blocks() as demo:
    gr.Markdown("## πŸ€– αž’αŸ’αž“αž€αž‡αŸ†αž“αž½αž™αž”αž„αŸ’αž€αžΎαžαž˜αŸαžšαŸ€αž“αž‚αžŽαž·αžαžœαž·αž‘αŸ’αž™αžΆ")
    gr.Markdown("αž‡αŸ’αžšαžΎαžŸαžšαžΎαžŸαžαŸ’αž“αžΆαž€αŸ‹ αž”αŸ’αžšαž’αžΆαž“αž”αž‘ αž“αž·αž„αž€αž˜αŸ’αžšαž·αžαžŸαž·αžŸαŸ’αžŸ αžšαž½αž…αž…αž»αž…αž”αž„αŸ’αž€αžΎαžαž˜αŸαžšαŸ€αž“αŸ” αž¬αž…αž»αž…αž”αŸŠαžΌαžαž»αž„αžαžΆαž„αž€αŸ’αžšαŸ„αž˜αžŸαž˜αŸ’αžšαžΆαž”αŸ‹αž”αž„αŸ’αž€αžΎαžαž˜αŸαžšαŸ€αž“αž‘αžΆαŸ†αž„αž’αžŸαŸ‹αŸ”")

    with gr.Row():
        grade = gr.Dropdown(choices=grade_options, label="αžαŸ’αž“αžΆαž€αŸ‹ (Grade)", value="1")
        topic = gr.Dropdown(choices=topic_options, label="αž”αŸ’αžšαž’αžΆαž“αž”αž‘ (Topic)", value="Addition")
        level = gr.Dropdown(choices=level_options, label="αž€αž˜αŸ’αžšαž·αžαžŸαž·αžŸαŸ’αžŸ (TaRL Level)", value="Beginner")

    output_box = gr.Textbox(
        label="πŸ“˜ Khmer Lesson Plan",
        lines=20,
        max_lines=200,
        show_copy_button=True,
        autoscroll=True
    )

    with gr.Row():
        gen_btn = gr.Button("βœ… αž”αž„αŸ’αž€αžΎαžαž˜αŸαžšαŸ€αž“")
        gen_all_btn = gr.Button("🧠 αž”αž„αŸ’αž€αžΎαžαž˜αŸαžšαŸ€αž“αž‘αžΆαŸ†αž„αž’αžŸαŸ‹")
        clear_btn = gr.Button("🧹 αžŸαž˜αŸ’αž’αžΆαž")

    gen_btn.click(fn=generate_lesson, inputs=[grade, topic, level], outputs=output_box)
    gen_all_btn.click(fn=generate_all_lessons, outputs=output_box)
    clear_btn.click(fn=lambda: "", outputs=output_box)

demo.queue()
demo.launch()