import gradio as gr import torch from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer from threading import Thread MODEL_ID = "BrainboxAI/code-il-E4B-safetensors" tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) model = AutoModelForCausalLM.from_pretrained( MODEL_ID, torch_dtype=torch.float32, device_map="cpu", low_cpu_mem_usage=True, ) model.eval() EXAMPLES = [ ["Implement binary search in TypeScript with full edge case handling and JSDoc comments."], ["Build a FastAPI endpoint that accepts a file upload, validates it's a PDF under 10MB, and returns its text content."], ["Write an n8n Code node (JavaScript) that takes input items, deduplicates by 'email' field, and returns the unique ones."], ["הסבר את הקוד הבא ותציע שיפורים:\n\nfunction calc(arr) {\n let s = 0;\n for (let i = 0; i < arr.length; i++) s += arr[i];\n return s / arr.length;\n}"], ] def generate(message, history, temperature, max_tokens): messages = [] for msg in history: if isinstance(msg, dict): messages.append({"role": msg["role"], "content": msg["content"]}) else: user_msg, assistant_msg = msg messages.append({"role": "user", "content": user_msg}) if assistant_msg: messages.append({"role": "assistant", "content": assistant_msg}) messages.append({"role": "user", "content": message}) inputs = tokenizer.apply_chat_template( messages, return_tensors="pt", add_generation_prompt=True, ) streamer = TextIteratorStreamer( tokenizer, skip_prompt=True, skip_special_tokens=True ) thread = Thread(target=model.generate, kwargs={ "input_ids": inputs, "max_new_tokens": max_tokens, "temperature": temperature, "top_p": 0.95, "do_sample": temperature > 0, "streamer": streamer, "pad_token_id": tokenizer.eos_token_id, }) thread.start() output = "" for token in streamer: output += token yield output with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue"), title="Code-IL E4B") as demo: gr.Markdown(""" # code-il-E4B ### מודל קוד ישראלי - 4 מיליארד פרמטרים, רץ על מחשב נייד מודל מבוסס Gemma 4 E4B שאומן על OpenCodeInstruct של NVIDIA + dataset קוד עברי-אנגלי משלי. מתמחה ב-Python, TypeScript, n8n, ותומך בעברית. > ⚠️ **דמו זה רץ על CPU - יקח 10-30 שניות לתשובה.** > להרצה מהירה במחשב שלך: ראה הוראות בתחתית. **By [BrainboxAI](https://huggingface.co/BrainboxAI)** - Powered by Unsloth """) chat = gr.ChatInterface( fn=generate, type="messages", examples=EXAMPLES, cache_examples=False, additional_inputs=[ gr.Slider(0.0, 1.0, value=0.2, step=0.05, label="Temperature"), gr.Slider(64, 512, value=256, step=64, label="Max Tokens (נמוך = מהיר יותר)"), ], additional_inputs_accordion=gr.Accordion("⚙️ הגדרות מתקדמות", open=False), ) gr.Markdown(""" --- ### הרצה מקומית (מהירה) ```python from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("BrainboxAI/code-il-E4B-safetensors") model = AutoModelForCausalLM.from_pretrained("BrainboxAI/code-il-E4B-safetensors", torch_dtype="auto", device_map="auto") ``` **Training**: NVIDIA OpenCodeInstruct (4.97M) + BrainboxAI/code-training-il (40k) + bleugreen/typescript-instruct (41k) **Format**: Safetensors 16-bit | **License**: Apache 2.0 | **Languages**: English + עברית """) if __name__ == "__main__": demo.queue(max_size=10).launch()