Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer | |
| from threading import Thread | |
| # ------------------------------------------------------------------ | |
| # 1. Model setup | |
| # ------------------------------------------------------------------ | |
| MODEL_ID = "michsethowusu/opani-coder_1b-merged-16bit" | |
| print("Loading tokenizer…") | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) | |
| print("Loading model…") | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_ID, | |
| torch_dtype=torch.float16, | |
| device_map="auto", | |
| low_cpu_mem_usage=True, | |
| trust_remote_code=True | |
| ) | |
| print("Model ready!") | |
| # ------------------------------------------------------------------ | |
| # 2. Generation helper | |
| # ------------------------------------------------------------------ | |
| def generate_response(message: str, history: list[dict], temperature, top_p, top_k, max_tokens): | |
| """ | |
| message: str — the newest user message | |
| history: list[dict] — previous turns in {"role": "user"|"assistant", "content": "…"} format | |
| yields partial assistant reply strings | |
| """ | |
| messages = history + [{"role": "user", "content": message}] | |
| prompt = tokenizer.apply_chat_template( | |
| messages, | |
| tokenize=False, | |
| add_generation_prompt=True | |
| ) | |
| inputs = tokenizer(prompt, return_tensors="pt").to(model.device) | |
| streamer = TextIteratorStreamer( | |
| tokenizer, | |
| skip_prompt=True, | |
| skip_special_tokens=True | |
| ) | |
| gen_kwargs = dict( | |
| **inputs, | |
| max_new_tokens=max_tokens, | |
| temperature=temperature, | |
| top_p=top_p, | |
| top_k=top_k, | |
| do_sample=True, | |
| streamer=streamer, | |
| ) | |
| thread = Thread(target=model.generate, kwargs=gen_kwargs) | |
| thread.start() | |
| partial = "" | |
| for new_text in streamer: | |
| partial += new_text | |
| yield partial | |
| thread.join() | |
| # ------------------------------------------------------------------ | |
| # 3. Gradio event helpers | |
| # ------------------------------------------------------------------ | |
| def user_submit(user_message, history): | |
| # history is list[dict] — append user message | |
| return "", history + [{"role": "user", "content": user_message}] | |
| def bot_respond(history, temperature, top_p, top_k, max_tokens): | |
| user_turn = history[-1]["content"] | |
| history_before = history[:-1] | |
| assistant_text = "" | |
| for assistant_text in generate_response( | |
| user_turn, history_before, temperature, top_p, top_k, max_tokens | |
| ): | |
| # update last dict incrementally | |
| history[-1] = {"role": "assistant", "content": assistant_text} | |
| yield history | |
| # ------------------------------------------------------------------ | |
| # 4. Gradio UI | |
| # ------------------------------------------------------------------ | |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| gr.Markdown( | |
| """ | |
| # 🇬🇭 Opani Coder 1B | |
| A fine-tuned Llama 3.2 1B model (16-bit) for coding assistance in Twi. | |
| Ask me anything about programming, and I'll help you out! | |
| """ | |
| ) | |
| chatbot = gr.Chatbot( | |
| height=500, | |
| label="Chat History", | |
| type="messages", | |
| avatar_images=(None, "https://em-content.zobj.net/source/twitter/53/robot-face_1f916.png"), | |
| ) | |
| with gr.Row(): | |
| msg = gr.Textbox( | |
| label="Your Message", | |
| placeholder="Ask me a coding question…", | |
| scale=4, | |
| lines=2, | |
| ) | |
| submit = gr.Button("Send 🚀", scale=1, variant="primary") | |
| with gr.Accordion("⚙️ Generation Parameters", open=False): | |
| gr.Markdown("*Adjust these settings to control the response style*") | |
| temperature = gr.Slider(0.1, 2.0, 0.7, step=0.1, label="Temperature") | |
| top_p = gr.Slider(0.1, 1.0, 0.9, step=0.05, label="Top P") | |
| top_k = gr.Slider(1, 100, 20, step=1, label="Top K") | |
| max_tokens = gr.Slider(64, 2048, 512, step=64, label="Max Tokens") | |
| clear = gr.Button("🗑️ Clear Chat") | |
| # ------------------------------------------------------------------ | |
| # 5. Examples | |
| # ------------------------------------------------------------------ | |
| gr.Examples( | |
| examples=[ | |
| ["Meyɛ dɛn na mekyerɛw Python function?"], | |
| ["Kyerɛkyerɛ nea for loop yɛ"], | |
| ["Kyerɛw calculator program a ɛnyɛ den"], | |
| ["Nsonoe bɛn na ɛda list ne tuple ntam?"], | |
| ["Boa me ma mensiesie saa code yi mu mfomso"], | |
| ], | |
| inputs=msg, | |
| label="Example Questions" | |
| ) | |
| # ------------------------------------------------------------------ | |
| # 6. Event wiring | |
| # ------------------------------------------------------------------ | |
| msg.submit( | |
| user_submit, [msg, chatbot], [msg, chatbot], queue=False | |
| ).then( | |
| bot_respond, | |
| [chatbot, temperature, top_p, top_k, max_tokens], | |
| chatbot, | |
| ) | |
| submit.click( | |
| user_submit, [msg, chatbot], [msg, chatbot], queue=False | |
| ).then( | |
| bot_respond, | |
| [chatbot, temperature, top_p, top_k, max_tokens], | |
| chatbot, | |
| ) | |
| clear.click(lambda: None, None, chatbot, queue=False) | |
| gr.Markdown( | |
| """ | |
| --- | |
| ### 💡 Tips for Best Results: | |
| - **Factual/Technical questions**: temperature 0.3-0.5 | |
| - **Creative coding solutions**: temperature 0.7-1.0 | |
| - **Code generation**: temperature 0.5-0.7 | |
| ### 📝 About This Model | |
| Fine-tuned Llama 3.2 1B (16-bit full model) for coding assistance in Twi. | |
| **Model**: [michsethowusu/opani-coder_1b-merged-16bit](https://huggingface.co/michsethowusu/opani-coder_1b-merged-16bit) | |
| """ | |
| ) | |
| # ------------------------------------------------------------------ | |
| # 7. Launch | |
| # ------------------------------------------------------------------ | |
| if __name__ == "__main__": | |
| demo.queue().launch() |