| import os |
| os.environ["TRANSFORMERS_NO_FLASH_ATTN"] = "1" |
|
|
| import torch |
| import gradio as gr |
| from transformers import AutoModel, AutoTokenizer |
|
|
| MODEL_NAME = "openbmb/MiniCPM-o-2_6" |
|
|
| print("Loading model...") |
|
|
| model = AutoModel.from_pretrained( |
| MODEL_NAME, |
| trust_remote_code=True, |
| torch_dtype=torch.float32, |
| low_cpu_mem_usage=True, |
| attn_implementation="eager" |
| ) |
|
|
| model = model.eval() |
|
|
| tokenizer = AutoTokenizer.from_pretrained( |
| MODEL_NAME, |
| trust_remote_code=True |
| ) |
|
|
| |
| try: |
| model.init_tts() |
| print("TTS ready") |
| except Exception as e: |
| print("TTS not available:", e) |
|
|
| def chat(text): |
|
|
| msgs = [ |
| {"role": "user", "content": [text]} |
| ] |
|
|
| audio_path = "output.wav" |
|
|
| try: |
| result = model.chat( |
| msgs=msgs, |
| tokenizer=tokenizer, |
| generate_audio=True, |
| output_audio_path=audio_path, |
| max_new_tokens=200, |
| temperature=0.3 |
| ) |
|
|
| reply = result if isinstance(result, str) else text |
|
|
| return reply, audio_path |
|
|
| except Exception as e: |
| return str(e), None |
|
|
|
|
| demo = gr.Interface( |
| fn=chat, |
| inputs=gr.Textbox(label="Type message"), |
| outputs=[ |
| gr.Textbox(label="Reply"), |
| gr.Audio(label="Voice Output") |
| ], |
| title="MiniCPM-o Voice Chatbot (CPU Stable)" |
| ) |
|
|
| demo.launch(server_name="0.0.0.0", server_port=7860) |