Spaces:

zhman
/

llama-math-solver

Sleeping

File size: 4,771 Bytes

"""
HuggingFace Spaces 推理应用
使用 Gradio 创建交互式界面
"""

import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# 模型配置
MODEL_NAME = "zhman/llama-SFT-GRPO"

# 加载模型和分词器（CPU 优化版本）
print("🔄 加载模型...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

# CPU 模式：使用 float32，不使用量化
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float32,  # CPU 友好的数据类型
    low_cpu_mem_usage=True,     # 降低内存使用
)
print("✅ 模型加载完成！")


def solve_math_problem(question, max_length=512, temperature=0.7, top_p=0.9):
    """
    解决数学问题
    
    Args:
        question: 数学问题
        max_length: 最大生成长度
        temperature: 温度参数
        top_p: Top-p 采样参数
    
    Returns:
        str: 模型生成的答案
    """
    # 构造提示词
    prompt = f"问题：{question}\n答案："
    
    # 编码输入
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    
    # 生成回答
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=min(max_length, 256),  # 限制生成长度以加快速度
            temperature=temperature,
            top_p=top_p,
            do_sample=True,
            pad_token_id=tokenizer.pad_token_id,
            eos_token_id=tokenizer.eos_token_id
        )
    
    # 解码输出
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    # 清理和提取答案
    # 1. 移除输入的问题部分
    if "答案：" in generated_text:
        answer = generated_text.split("答案：", 1)[1].strip()
    else:
        answer = generated_text.replace(prompt, "").strip()
    
    # 2. 清理训练格式文本
    # 移除包含这些关键词的行（训练数据格式）
    cleanup_keywords = [
        "运算符", "运算方法", "运算公式", "运算步骤",
        "左值", "右值", "中值", "结果值",
        "Step", "步骤编号",
    ]
    
    # 分行处理
    lines = answer.split('\n')
    cleaned_lines = []
    
    for line in lines:
        line = line.strip()
        # 跳过空行
        if not line:
            continue
        # 跳过包含训练格式关键词的行
        if any(keyword in line for keyword in cleanup_keywords):
            continue
        # 保留这一行
        cleaned_lines.append(line)
    
    # 合并清理后的行，最多保留前 3 行
    if cleaned_lines:
        final_answer = '\n'.join(cleaned_lines[:3])
    else:
        # 如果全部被清理，返回原始答案的前 200 字符
        final_answer = answer[:200]
    
    return final_answer


# 创建 Gradio 界面
demo = gr.Interface(
    fn=solve_math_problem,
    inputs=[
        gr.Textbox(
            label="💬 请输入您的数学问题",
            placeholder="例如：一个长方形的长是8厘米，宽是5厘米，它的周长是多少？",
            lines=3
        ),
        gr.Slider(
            minimum=50,
            maximum=512,
            value=256,  # 降低默认值以加快 CPU 推理
            step=50,
            label="📏 最大长度"
        ),
        gr.Slider(
            minimum=0.0,
            maximum=2.0,
            value=0.7,
            step=0.1,
            label="🌡️ Temperature"
        ),
        gr.Slider(
            minimum=0.0,
            maximum=1.0,
            value=0.9,
            step=0.05,
            label="🎯 Top P"
        )
    ],
    outputs=gr.Textbox(
        label="✨ AI 回答",
        lines=5
    ),
    title="🧮 数学问题求解 AI",
    description="""
    基于 Llama-3.2-1B-Instruct 微调的数学问题求解模型。
    
    **使用方法**：
    1. 在输入框中输入您的数学问题
    2. 调整推理参数（可选）
    3. 点击 Submit 获取答案
    
    **模型信息**：
    - 基础模型：Llama-3.2-1B-Instruct
    - 微调任务：数学推理和问题求解
    """,
    examples=[
        ["Find the positive integer $n$ such that $10^n$ cubic centimeters is the same as 1 cubic kilometer.", 256, 0.7, 0.9],
        ["Define an operation $\\Diamond$ as $ a \\Diamond b = 12a - 10b.$ Compute the value of $((((20 \\Diamond 22) \\Diamond 22) \\Diamond 22) \\Diamond22).$", 256, 0.7, 0.9],
        ["S1.1 Let $a, b, c$ and $d$ be the distinct roots of the equation $x^{4}-15 x^{2}+56=0$. If $R=a^{2}+b^{2}+c^{2}+d^{2}$, find the value of $R$.", 256, 0.7, 0.9]
    ],
    cache_examples=False,  # 禁用示例缓存，避免启动时卡住
    theme=gr.themes.Soft()
)

# 启动应用
if __name__ == "__main__":
    demo.launch()