import os os.environ["TRANSFORMERS_NO_FLASH_ATTN"] = "1" import torch import gradio as gr from transformers import AutoModel, AutoTokenizer MODEL_NAME = "openbmb/MiniCPM-o-2_6" print("Loading model...") model = AutoModel.from_pretrained( MODEL_NAME, trust_remote_code=True, torch_dtype=torch.float32, low_cpu_mem_usage=True, attn_implementation="eager" # 🔥 IMPORTANT FIX ) model = model.eval() tokenizer = AutoTokenizer.from_pretrained( MODEL_NAME, trust_remote_code=True ) # Init TTS safely try: model.init_tts() print("TTS ready") except Exception as e: print("TTS not available:", e) def chat(text): msgs = [ {"role": "user", "content": [text]} ] audio_path = "output.wav" try: result = model.chat( msgs=msgs, tokenizer=tokenizer, generate_audio=True, output_audio_path=audio_path, max_new_tokens=200, temperature=0.3 ) reply = result if isinstance(result, str) else text return reply, audio_path except Exception as e: return str(e), None demo = gr.Interface( fn=chat, inputs=gr.Textbox(label="Type message"), outputs=[ gr.Textbox(label="Reply"), gr.Audio(label="Voice Output") ], title="MiniCPM-o Voice Chatbot (CPU Stable)" ) demo.launch(server_name="0.0.0.0", server_port=7860)