import os import json import time import torch import gradio as gr from datetime import datetime, timedelta from transformers import AutoTokenizer, AutoModelForCausalLM # ---------------------------- # Config and defaults # ---------------------------- MODEL_OPTIONS = { "Phi-3.5 Mini Instruct (4B)": "microsoft/Phi-3.5-mini-instruct", "Phi-3.5 MoE Instruct (42B)": "microsoft/Phi-3.5-MoE-instruct", "Phi-3 Mini 4K Instruct (4B)": "microsoft/Phi-3-mini-4k-instruct", "Phi-3 Mini 128K Instruct (4B)": "microsoft/Phi-3-mini-128k-instruct" } EXAMPLES = [ # Comprehension "Listen to this short passage and tell me the main idea in your own words.", # Explanation "I’ll teach you something new: Solar panels turn sunlight into electricity. Can you explain that back to me simply?", # Vocabulary / Translation "Here’s a new phrase: 'The sea is calm today.' Try saying it in Basque, then repeat it in English.", # Style play "Let’s practice style: noir detective. Write one short sentence about Gros in that style.", # Literary reflection "Here’s a Shakespeare line: 'All the world’s a stage.' What do you think it means?", # Emotional reading "Read this Dickens passage and tell me how it feels — happy, sad, or something else?", # Poetry + translation "Translate this short poem line into another language, then tell me what mood it carries.", # Summarization + reflection "Summarize this text in two sentences, then say if it sounds optimistic or pessimistic.", # New: opinion practice "Read a short story and tell me what part you liked the most.", # New: correction loop "I’ll give you a sentence with a mistake: 'He go to school yesterday.' Can you fix it?" ] DEFAULT_PROFILE = { "name": "Learner", "style": ["concise", "reflective", "Basque context where relevant"], "goals": ["conversation-first learning", "daily language blocks", "CPU-only"] } DEFAULT_BLOCKS = [ {"type": "style", "rule": "Ask clarifying questions when uncertain."}, {"type": "vocab", "rule": "Use sensory detail + local place anchoring when writing creatively."}, {"type": "conversation", "rule": "Keep answers short and specific; avoid repeating conclusions."} ] BLOCKS_FILE = "blocks.json" # ---------------------------- # Persistence helpers # ---------------------------- def load_blocks(): if os.path.exists(BLOCKS_FILE): try: with open(BLOCKS_FILE, "r", encoding="utf-8") as f: return json.load(f) except Exception: pass return {"user_profile": DEFAULT_PROFILE, "language_blocks": DEFAULT_BLOCKS} def save_blocks(data): with open(BLOCKS_FILE, "w", encoding="utf-8") as f: json.dump(data, f, ensure_ascii=False, indent=2) def add_block(data, rule_text, block_type="conversation"): if not rule_text.strip(): return data entry = { "type": block_type, "rule": rule_text.strip(), "validated": True, "review_schedule": schedule_reviews() } data["language_blocks"].append(entry) save_blocks(data) return data def schedule_reviews(): today = datetime.utcnow().date() return [ str(today + timedelta(days=1)), str(today + timedelta(days=3)), str(today + timedelta(days=7)) ] # ---------------------------- # Model loading (CPU-only) # ---------------------------- _loaded = {} # cache def load_model(model_id): if model_id in _loaded: return _loaded[model_id] tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained( model_id, trust_remote_code=True, torch_dtype=torch.float32 # CPU friendly ) model.eval() _loaded[model_id] = (tokenizer, model) return tokenizer, model # ---------------------------- # Prompt construction # ---------------------------- def format_blocks(blocks): return "\n".join([f"- [{b.get('type','rule')}] {b.get('rule','')}" for b in blocks]) SYSTEM_TEMPLATE = """You are a conversation-first learning chatbot. Follow the user's style and goals, reinforce today's blocks, and confirm corrections. User style: {style} Goals: {goals} Active language blocks: {blocks} Guidelines: - Keep responses concise and specific. - Ask for clarification when needed. - Extract new patterns only when validated by the user. """ def build_messages(user_text, profile, blocks): system = SYSTEM_TEMPLATE.format( style=", ".join(profile.get("style", [])), goals=", ".join(profile.get("goals", [])), blocks=format_blocks(blocks) ) return [ {"role": "system", "content": system}, {"role": "user", "content": user_text} ] # ---------------------------- # Generate (with token/latency) # ---------------------------- def chat(user_text, model_label, blocks_json): # parse blocks from textarea (JSON or fallback lines) data = load_blocks() blocks = parse_blocks_editor(blocks_json, data.get("language_blocks", [])) model_id = MODEL_OPTIONS[model_label] tokenizer, model = load_model(model_id) messages = build_messages(user_text, data["user_profile"], blocks) inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_tensors="pt" ).to("cpu") start = time.time() with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=200, do_sample=False, use_cache=False # Avoid DynamicCache mismatch issues on some setups ) latency = time.time() - start # slice out the generated continuation gen_text = tokenizer.decode( outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True ).strip() # token counts input_tokens = int(inputs["input_ids"].shape[-1]) output_tokens = int(outputs[0].shape[-1] - inputs["input_ids"].shape[-1]) metrics = f"Input tokens: {input_tokens} | Output tokens: {output_tokens} | Latency: {latency:.2f}s" return gen_text, metrics def parse_blocks_editor(text, fallback): """ Accept either: - JSON array of blocks - Plain text lines ("type: rule") """ if not text or not text.strip(): return fallback text = text.strip() try: parsed = json.loads(text) if isinstance(parsed, list): return parsed except Exception: pass # Fallback: each non-empty line becomes a block blocks = [] for line in text.splitlines(): line = line.strip() if not line: continue if ":" in line: t, r = line.split(":", 1) blocks.append({"type": t.strip(), "rule": r.strip()}) else: blocks.append({"type": "rule", "rule": line}) return blocks or fallback # ---------------------------- # Reflection: extract new rule # ---------------------------- REFLECT_TEMPLATE = """From the user's last message and your reply, extract ONE reusable conversation rule. Return only the rule, no preface, max 20 words. Example rules: - Ask clarifying questions when uncertain. - Use sensory detail with local anchors in creative writing. - Summarize then assess tone (optimistic/pessimistic). User said: {user} Assistant replied: {assistant} Now output one new rule:""" def reflect_and_save(user_text, assistant_text, blocks_editor_value): data = load_blocks() # Propose a rule via a simple heuristic (no extra model call, keeps it lean) # If you prefer model-based reflection, you can run a generation with REFLECT_TEMPLATE. proposal = heuristic_rule(user_text, assistant_text) data = add_block(data, proposal, block_type="conversation") # Return updated blocks as pretty JSON to show in the editor pretty = json.dumps(data["language_blocks"], ensure_ascii=False, indent=2) return pretty, f"Saved rule: {proposal}" def heuristic_rule(user_text, assistant_text): # Very simple heuristic: if assistant asked a question, reinforce clarification; # otherwise, reinforce concise responses. if "?" in assistant_text: return "Ask clarifying questions when uncertain." # If user asked for style or translation, capture that low = user_text.lower() if "translate" in low: return "Confirm translation intent and target tone before translating." if "style" in low or "noir" in low: return "Confirm style constraints before writing and keep it concise." return "Keep answers short, specific, and avoid repeating conclusions." # ---------------------------- # Gradio UI # ---------------------------- def launch(): data = load_blocks() default_blocks_text = json.dumps(data["language_blocks"], ensure_ascii=False, indent=2) with gr.Blocks(title="Conversation Learning Lab (CPU)") as demo: gr.Markdown("# 🗣️ Conversation Learning Lab (CPU-friendly)") gr.Markdown("Focus on daily dialogue. Reinforce validated language blocks. Transparent tokens and latency.") with gr.Row(): model_dd = gr.Dropdown( label="Choose a model", choices=list(MODEL_OPTIONS.keys()), value="Phi-3.5 Mini Instruct (4B)" ) with gr.Row(): user_in = gr.Textbox( label="Your message", placeholder="Start a conversation or choose an example below...", lines=3 ) with gr.Row(): blocks_editor = gr.Textbox( label="Today's blocks (JSON array or 'type: rule' lines)", value=default_blocks_text, lines=10 ) with gr.Row(): generate_btn = gr.Button("Generate (CPU)") reflect_btn = gr.Button("Reflect & Save Rule") with gr.Row(): output = gr.Textbox(label="Assistant", lines=8) with gr.Row(): metrics = gr.Markdown("") gr.Markdown("### 🧪 Try an example prompt:") gr.Examples( examples=EXAMPLES, inputs=user_in ) # Wire up events generate_btn.click( fn=chat, inputs=[user_in, model_dd, blocks_editor], outputs=[output, metrics] ) reflect_btn.click( fn=reflect_and_save, inputs=[user_in, output, blocks_editor], outputs=[blocks_editor, metrics] ) demo.launch() if __name__ == "__main__": launch()