| import os | |
| from huggingface_hub import hf_hub_download | |
| from llama_cpp import Llama | |
| import gradio as gr | |
| # 确保模型存储路径 | |
| model_dir = "/home/user/models" | |
| os.makedirs(model_dir, exist_ok=True) | |
| # 下载 GGUF 模型 | |
| model_path = hf_hub_download(repo_id="YLX1965/medical-model", filename="unsloth.Q8_0.gguf", cache_dir=model_dir) | |
| # 加载 GGUF 模型 | |
| llm = Llama(model_path=model_path) | |
| # 定义聊天函数 | |
| def chat(prompt): | |
| output = llm(prompt, max_tokens=200) | |
| return output["choices"][0]["text"] | |
| # 运行 Gradio | |
| interface = gr.Interface(fn=chat, inputs="text", outputs="text") | |
| interface.launch() |