import os import torch from transformers import AutoModelForCausalLM, AutoTokenizer import gradio as gr model_id = "microsoft/DialoGPT-small" model = None tokenizer = None print(f"\nLoading model: {model_id}...") try: tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained(model_id) model.eval() print("Model loaded successfully!") except Exception as e: print(f"Error loading model: {e}") print("Ensure you are using a CPU-compatible model and that internet access is available.") def medical_qna(question): if model is None or tokenizer is None: return "Error: Model failed to load in a previous step." try: input_ids = tokenizer.encode(question + tokenizer.eos_token, return_tensors="pt") with torch.no_grad(): output_ids = model.generate( input_ids, max_new_tokens=150, pad_token_id=tokenizer.eos_token_id, do_sample=True, top_k=50, top_p=0.95, temperature=0.7 ) generated_text = tokenizer.decode(output_ids[:, input_ids.shape[-1]:][0], skip_special_tokens=True) thinking_process = "Analyzing symptoms and context..." final_response = generated_text.strip() formatted_output = f"**Thinking Process:**\n{thinking_process}\n\n**Final Response:**\n{final_response}" return formatted_output except Exception as e: return f"An error occurred during processing: {e}" if model is not None and tokenizer is not None: print("\nSetting up Gradio interface...") try: interface = gr.Interface( fn=medical_qna, inputs=gr.Textbox(lines=3, placeholder="Enter medical question here...", label="Your Medical Question"), outputs=gr.Markdown(label="Chatbot Response"), title="Lightweight Medical Chatbot", description="Ask a medical-style question. This uses a small model for CPU usage. Not for real medical advice." ) print("Launching Gradio interface...") interface.launch() except Exception as e: print(f"Error launching Gradio interface: {e}") else: print("\nModel was not loaded. Skipping Gradio interface setup.")