import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline MODEL_NAME = "Abeersherif/Medical_Homework2" # Load model and tokenizer tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForCausalLM.from_pretrained(MODEL_NAME) pipe = pipeline( "text-generation", model=model, tokenizer=tokenizer, ) def respond(message, history, system_message, max_tokens, temperature, top_p): """ message: str -> latest user message history: list[[user, bot], ...] -> previous chat turns (default tuples mode) """ # Build a simple text prompt (no chat template) conversation = f"System: {system_message}\n\n" for user_msg, bot_msg in history: conversation += f"User: {user_msg}\nAssistant: {bot_msg}\n" conversation += f"User: {message}\nAssistant:" result = pipe( conversation, max_new_tokens=int(max_tokens), temperature=float(temperature), top_p=float(top_p), do_sample=True, )[0]["generated_text"] # Keep only what the assistant said last if "Assistant:" in result: result = result.split("Assistant:")[-1] return result.strip() chatbot = gr.ChatInterface( fn=respond, # ⚠️ use default history format (tuples), do NOT set type="messages" here additional_inputs=[ gr.Textbox( "You are a helpful medical assistant. Answer concisely with brief reasoning.", label="System message", ), gr.Slider(1, 512, value=256, step=1, label="Max new tokens"), gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature"), gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p"), ], ) if __name__ == "__main__": chatbot.launch()