Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| model_path = "ibm-granite/granite-4.0-h-350M" | |
| # Model ve tokenizer | |
| tokenizer = AutoTokenizer.from_pretrained(model_path) | |
| model = AutoModelForCausalLM.from_pretrained(model_path, device_map=device) | |
| model.eval() | |
| def respond(message, history, max_new_tokens, temperature): | |
| """ | |
| history: önceki mesajlar listesi | |
| """ | |
| history = history or [] | |
| # Mesaj geçmişini chat formatına çevir | |
| chat = [] | |
| for h in history: | |
| if h["role"] == "user": | |
| chat.append({"role": "user", "content": h["content"]}) | |
| chat.append({"role": "user", "content": message}) | |
| chat_text = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True) | |
| input_tokens = tokenizer(chat_text, return_tensors="pt").to(device) | |
| # Yanıt üretimi | |
| output_tokens = model.generate( | |
| **input_tokens, | |
| max_new_tokens=max_new_tokens | |
| ) | |
| output_text = tokenizer.batch_decode(output_tokens)[0] | |
| # History güncelle | |
| history.append({"role": "user", "content": message}) | |
| history.append({"role": "assistant", "content": output_text}) | |
| return output_text, history | |
| # Gradio chat interface | |
| chatbot = gr.ChatInterface( | |
| respond, | |
| type="messages", | |
| additional_inputs=[ | |
| gr.Slider(minimum=1, maximum=1024, value=200, step=1, label="Max new tokens"), | |
| gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.05, label="Temperature"), | |
| ], | |
| ) | |
| with gr.Blocks() as demo: | |
| chatbot.render() | |
| if __name__ == "__main__": | |
| demo.launch() | |