File size: 4,637 Bytes
35b7945 2d18165 35b7945 e6a070e 034ce92 8a8c652 d98519f 8a8c652 e6a070e 3eda1ab 8a8c652 35b7945 034ce92 8a8c652 c9fec7f e6a070e 388ecb4 07f129c 91f2c92 35b7945 388ecb4 2d18165 cd0448f 6282a9e 2d18165 cd0448f 2d18165 cd0448f 6282a9e 91f2c92 8a8c652 07f129c 8a8c652 c9fec7f e6a070e c9fec7f 07f129c 91f2c92 35b7945 c9fec7f e6a070e 35b7945 ce4e9ec 35b7945 8a8c652 e6a070e ce4e9ec 8a8c652 e6a070e ce4e9ec e6a070e 35b7945 ce4e9ec f1fc208 07f129c ce4e9ec e6a070e 2d18165 e6a070e 388ecb4 034ce92 e6a070e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 |
import os
import gradio as gr
from transformers import pipeline, AutoTokenizer
import torch
import spaces
import json
from huggingface_hub import HfApi, upload_file
# --- Constants ---
HF_TOKEN = os.environ.get("HF_TOKEN")
DATASET_REPO = "Pisethan/khmer-lesson-dataset-generated"
LOCAL_JSONL = "generated_lessons.jsonl"
# --- Options ---
grade_options = ["1", "2", "3", "4", "5", "6"]
topic_options = ["Addition", "Subtraction", "Counting", "Number Recognition", "Multiplication", "Division"]
level_options = ["Beginner", "Intermediate", "Advanced"]
# --- Tokenizer (global) ---
tokenizer = AutoTokenizer.from_pretrained("Pisethan/khmer-lesson-model", token=HF_TOKEN)
# --- Helper to save and upload ---
def save_to_jsonl(record):
with open(LOCAL_JSONL, "a", encoding="utf-8") as f:
f.write(json.dumps(record, ensure_ascii=False) + "\n")
upload_file(
path_or_fileobj=LOCAL_JSONL,
path_in_repo="generated_lessons.jsonl",
repo_id=DATASET_REPO,
repo_type="dataset",
token=HF_TOKEN
)
# --- Generation for one lesson ---
@spaces.GPU
def generate_lesson(grade, topic, level):
device = 0 if torch.cuda.is_available() else -1
pipe = pipeline(
"text-generation",
model="Pisethan/khmer-lesson-model-v2",
tokenizer=tokenizer,
device=device,
token=HF_TOKEN
)
prompt = f"""
You are a lesson planning assistant. Return only one structured Khmer math lesson plan with these fields:
Lesson Title:
Objective:
Activity:
Instruction (Khmer):
Materials:
Please follow the structure exactly.
Grade: {grade}
Topic: {topic}
TaRL Level: {level}
"""
output = pipe(prompt, max_new_tokens=300, temperature=0.7, do_sample=True, eos_token_id=tokenizer.eos_token_id)
result = output[0]['generated_text']
# Save to dataset
record = {
"grade": grade,
"topic": topic,
"level": level,
"prompt": prompt.strip(),
"completion": result.strip()
}
save_to_jsonl(record)
return result
# --- Generation for all combinations ---
@spaces.GPU
def generate_all_lessons():
device = 0 if torch.cuda.is_available() else -1
pipe = pipeline(
"text-generation",
model="Pisethan/khmer-lesson-model-v2",
tokenizer=tokenizer,
device=device,
token=HF_TOKEN
)
results = ""
for grade in grade_options:
for topic in topic_options:
for level in level_options:
prompt = f"""Generate a Khmer math lesson plan.
Grade: {grade}
Topic: {topic}
TaRL Level: {level}"""
output = pipe(prompt, max_new_tokens=200, temperature=0.7, do_sample=True)
result = output[0]['generated_text']
record = {
"grade": grade,
"topic": topic,
"level": level,
"prompt": prompt.strip(),
"completion": result.strip()
}
save_to_jsonl(record)
results += f"πΉ ααααΆαα {grade} | {topic} | {level}\n{result}\n\n{'-'*50}\n\n"
return results
# --- UI ---
with gr.Blocks() as demo:
gr.Markdown("## π€ α’ααααααα½ααααααΎααααααααα·ααα·ααααΆ")
gr.Markdown("ααααΎαααΎαααααΆαα αααααΆααα αα·αααααα·ααα·ααα αα½α
α
α»α
αααααΎααααααα α¬α
α»α
αααΌαα»αααΆαααααααααααΆαααααααΎααααααααΆααα’ααα")
with gr.Row():
grade = gr.Dropdown(choices=grade_options, label="ααααΆαα (Grade)", value="1")
topic = gr.Dropdown(choices=topic_options, label="αααααΆααα (Topic)", value="Addition")
level = gr.Dropdown(choices=level_options, label="ααααα·ααα·ααα (TaRL Level)", value="Beginner")
output_box = gr.Textbox(
label="π Khmer Lesson Plan",
lines=20,
max_lines=200,
show_copy_button=True,
autoscroll=True
)
with gr.Row():
gen_btn = gr.Button("β
αααααΎαααααα")
gen_all_btn = gr.Button("π§ αααααΎααααααααΆααα’αα")
clear_btn = gr.Button("π§Ή αααα’αΆα")
gen_btn.click(fn=generate_lesson, inputs=[grade, topic, level], outputs=output_box)
gen_all_btn.click(fn=generate_all_lessons, outputs=output_box)
clear_btn.click(fn=lambda: "", outputs=output_box)
demo.queue()
demo.launch()
|