import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig from transformers import Qwen2ForCausalLM import torch model_id = "cyirr/finetunecoder" print("Loading tokenizer...") tokenizer = AutoTokenizer.from_pretrained("unsloth/deepseek-r1-distill-qwen-7b") print("Loading model...") model = Qwen2ForCausalLM.from_pretrained( model_id, torch_dtype=torch.float32, low_cpu_mem_usage=True, ) model.eval() print("Model ready!") SYS = "You are an expert Python developer. You write clean, efficient, well-commented Python code. You follow PEP8, use Pythonic patterns, and always explain your reasoning." def chat(message, history): prompt = "<|im_start|>system\n" + SYS + "\n<|im_end|>\n" for user, assistant in history: prompt += "<|im_start|>user\n" + user + "\n<|im_end|>\n" prompt += "<|im_start|>assistant\n" + assistant + "\n<|im_end|>\n" prompt += "<|im_start|>user\n" + message + "\n<|im_end|>\n<|im_start|>assistant\n" inputs = tokenizer(prompt, return_tensors="pt") with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=256, temperature=0.7, do_sample=True, pad_token_id=tokenizer.eos_token_id, ) response = tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True) return response gr.ChatInterface( fn=chat, title="DeepSeek Python Pro", description="Fine-tuned Python coding assistant", examples=[ "Write a Python function to reverse a linked list", "Explain decorators with an example", "Write a binary search in Python", ], ).launch()