Spaces:
Sleeping
Sleeping
| """ | |
| Feynman Explainer β Gradio Chat App | |
| Runs on Hugging Face Spaces (CPU free tier). | |
| """ | |
| import gradio as gr | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| MODEL_ID = "shabul/qwen2.5-3b-feynman-explainer" | |
| SYSTEM_PROMPT = ( | |
| "You are a Feynman-style explainer. For every question, build intuition " | |
| "from the ground up using concrete analogies and everyday language. " | |
| "No jargon until it's earned. No bullet points. Pure flowing prose. " | |
| "Be conversational and enthusiastic β like Feynman genuinely loved this topic." | |
| ) | |
| print(f"Loading model: {MODEL_ID}") | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_ID, | |
| torch_dtype=torch.float32, | |
| low_cpu_mem_usage=True, | |
| ) | |
| model.eval() | |
| print("Model loaded.") | |
| def respond(message: str, history: list): | |
| try: | |
| messages = [{"role": "system", "content": SYSTEM_PROMPT}] | |
| for h in history: | |
| role = h.get("role") if isinstance(h, dict) else getattr(h, "role", None) | |
| content = h.get("content") if isinstance(h, dict) else getattr(h, "content", None) | |
| if role and content: | |
| messages.append({"role": role, "content": str(content)}) | |
| messages.append({"role": "user", "content": message}) | |
| encoded = tokenizer.apply_chat_template( | |
| messages, | |
| tokenize=True, | |
| add_generation_prompt=True, | |
| return_tensors="pt", | |
| return_dict=True, | |
| ) | |
| prompt_len = encoded["input_ids"].shape[1] | |
| with torch.no_grad(): | |
| output_ids = model.generate( | |
| **encoded, | |
| max_new_tokens=100, | |
| do_sample=True, | |
| temperature=0.75, | |
| repetition_penalty=1.1, | |
| ) | |
| response = tokenizer.decode( | |
| output_ids[0][prompt_len:], | |
| skip_special_tokens=True, | |
| ) | |
| return response | |
| except Exception as e: | |
| import traceback | |
| err = traceback.format_exc() | |
| print(err) | |
| return f"β οΈ TRACEBACK:\n{err}" | |
| demo = gr.ChatInterface( | |
| fn=respond, | |
| type="messages", | |
| title="π¬ Feynman Explainer", | |
| description=( | |
| "Ask anything. Feynman-style explanations β analogy first, no jargon until it's earned.\n\n" | |
| "β±οΈ **CPU only** β responses take 2β4 minutes. First token appears after ~30s." | |
| ), | |
| examples=[ | |
| "How does gradient descent actually work?", | |
| "What is entropy and why does it always increase?", | |
| "What is a p-value?", | |
| "Why does ice float on water?", | |
| "How does attention work in language models?", | |
| ], | |
| cache_examples=False, | |
| ) | |
| demo.launch() | |