Spaces:
No application file
No application file
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| import torch | |
| import json | |
| # 全局变量,避免重复加载 | |
| model = None | |
| tokenizer = None | |
| def load_model(): | |
| """加载模型和分词器""" | |
| global model, tokenizer | |
| model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" | |
| try: | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| torch_dtype=torch.float16, # 使用半精度减少内存占用 | |
| device_map="auto", # 自动分配至CPU | |
| low_cpu_mem_usage=True # 优化CPU内存使用 | |
| ) | |
| print("模型加载成功!") | |
| except Exception as e: | |
| print(f"模型加载失败: {e}") | |
| def openai_compatible_api(message, history): | |
| """处理OpenAI格式的请求""" | |
| if model is None: | |
| load_model() | |
| # 将历史记录和当前消息组合成对话格式 | |
| # 这里需要根据DeepSeek模型要求的模板格式化工序 | |
| # 以下是一个简化示例,您需要根据模型的具体要求调整 | |
| prompt = f"\n\nHuman: {message}\n\nAssistant:" | |
| inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024) | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=512, | |
| temperature=0.7, | |
| top_p=0.95, | |
| do_sample=True, | |
| pad_token_id=tokenizer.eos_token_id | |
| ) | |
| response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| # 提取模型生成的部分 | |
| generated_text = response.split("Assistant:")[-1].strip() | |
| # 返回OpenAI兼容格式 | |
| return { | |
| "choices": [{ | |
| "message": { | |
| "role": "assistant", | |
| "content": generated_text | |
| } | |
| }] | |
| } | |
| # 在Gradio界面启动前加载模型(可选,或等待第一个请求时加载) | |
| load_model() | |
| # 创建Gradio界面,但隐藏默认的Web UI,专注于API | |
| demo = gr.ChatInterface( | |
| fn=openai_compatible_api, | |
| title="DeepSeek API Service", | |
| description="OpenAI-compatible API for DeepSeek-R1" | |
| ) | |
| # 禁用自动创建公共链接,仅以API模式运行 | |
| if __name__ == "__main__": | |
| demo.launch(show_api=True, server_name="0.0.0.0", server_port=7860) |