Spaces:

openbmb
/

UltraData-Math-L3-Generator

Running

App Files Files Community

UltraData-Math-L3-Generator / app.py

chuyue

fix: use correct API endpoint and model (GLM_ar7snd)

968a16c 3 months ago

raw

history blame

12.9 kB

	# -- coding: utf-8 --
	"""
	UltraData-Math L3 Generator - Hugging Face Space Demo
	"""

	import os
	import asyncio
	import json
	import gradio as gr

	from openai import AsyncOpenAI

	from qa_synthesis import QA_PROMPTS, get_qa_prompt
	from conversation_synthesis import CONVERSATION_PROMPTS, get_conversation_prompt
	from multistyle_rewrite import MULTISTYLE_PROMPTS, get_multistyle_prompt
	from knowledge_textbook import (
	get_knowledge_extraction_prompt,
	get_textbook_exercise_prompt,
	TEXTBOOK_EXERCISE_PROMPTS,
	)
	from run_synthesis import (
	parse_qa_output,
	parse_conversation_output,
	parse_rewrite_output,
	parse_knowledge_output,
	parse_textbook_output,
	)

	# API 配置从环境变量读取（通过 HF Secrets 设置）
	API_KEY = os.getenv("OPENAI_API_KEY")
	BASE_URL = os.getenv("OPENAI_BASE_URL", "https://llm-center.ali.modelbest.cn/llm/openai/v1")
	DEFAULT_MODEL = "GLM_ar7snd"


	async def call_api(prompt: str, model: str = DEFAULT_MODEL, temperature: float = 0.7) -> str:
	"""调用 API 生成内容"""
	if not API_KEY:
	return "Error: API Key not configured. Please contact administrator."

	client = AsyncOpenAI(api_key=API_KEY, base_url=BASE_URL)
	try:
	response = await client.chat.completions.create(
	model=model,
	messages=[{"role": "user", "content": prompt}],
	temperature=temperature,
	max_tokens=8192,
	)
	# 处理 reasoning model 的返回格式
	message = response.choices[0].message
	content = message.content
	# 如果 content 为空，尝试获取 reasoning_content
	if not content and hasattr(message, 'reasoning_content') and message.reasoning_content:
	content = message.reasoning_content
	return content or ""
	except Exception as e:
	return f"Error: {str(e)}"


	def run_async(coro):
	"""运行异步函数"""
	try:
	loop = asyncio.get_event_loop()
	except RuntimeError:
	loop = asyncio.new_event_loop()
	asyncio.set_event_loop(loop)
	return loop.run_until_complete(coro)


	# ============================================================================
	# Task Handlers
	# ============================================================================

	def qa_synthesis(text: str, level: str, model: str, temperature: float):
	"""Q&A 问答对合成"""
	if not text.strip():
	return "", "", ""

	prompt_template = get_qa_prompt(level)
	prompt = prompt_template.format(text=text)

	response = run_async(call_api(prompt, model, temperature))
	parsed = parse_qa_output(response)

	return (
	parsed.get("problem", ""),
	parsed.get("solution", ""),
	response
	)


	def conversation_synthesis(text: str, style: str, model: str, temperature: float):
	"""多轮对话合成"""
	if not text.strip():
	return "", ""

	prompt_template = get_conversation_prompt(style)
	prompt = prompt_template.format(text=text)

	response = run_async(call_api(prompt, model, temperature))
	parsed = parse_conversation_output(response)

	return parsed.get("content", response), response


	def rewrite_synthesis(text: str, style: str, model: str, temperature: float):
	"""多风格改写"""
	if not text.strip():
	return "", ""

	prompt_template = get_multistyle_prompt(style)
	prompt = prompt_template.format(text=text)

	response = run_async(call_api(prompt, model, temperature))
	parsed = parse_rewrite_output(response)

	return parsed.get("rewritten", response), response


	def knowledge_extraction(text: str, model: str, temperature: float):
	"""知识点提取"""
	if not text.strip():
	return "", ""

	prompt_template = get_knowledge_extraction_prompt()
	prompt = prompt_template.format(text=text)

	response = run_async(call_api(prompt, model, temperature))
	parsed = parse_knowledge_output(response)

	knowledge_points = parsed.get("knowledge_points", [])
	formatted = "\n\n---\n\n".join(knowledge_points) if knowledge_points else "No knowledge points extracted."

	return formatted, response


	def textbook_exercise(knowledge_point: str, difficulty: str, model: str, temperature: float):
	"""教材练习生成"""
	if not knowledge_point.strip():
	return "", ""

	prompt_template = get_textbook_exercise_prompt(difficulty)
	prompt = prompt_template.format(mathematical_knowledge_point=knowledge_point)

	response = run_async(call_api(prompt, model, temperature))
	parsed = parse_textbook_output(response)

	return parsed.get("material", response), response


	# ============================================================================
	# Gradio UI
	# ============================================================================

	custom_css = """
	.gradio-container {
	font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif !important;
	background: linear-gradient(135deg, #1a1a2e 0%, #16213e 50%, #0f3460 100%) !important;
	}

	.main-title {
	font-weight: 700 !important;
	font-size: 2.2rem !important;
	background: linear-gradient(90deg, #e94560, #f39c12, #00d9ff) !important;
	-webkit-background-clip: text !important;
	-webkit-text-fill-color: transparent !important;
	background-clip: text !important;
	text-align: center !important;
	}

	.subtitle {
	text-align: center !important;
	color: #94a3b8 !important;
	font-size: 1rem !important;
	margin-bottom: 1.5rem !important;
	}

	.gr-button-primary {
	background: linear-gradient(135deg, #e94560 0%, #f39c12 100%) !important;
	border: none !important;
	font-weight: 600 !important;
	}

	.gr-button-primary:hover {
	transform: translateY(-2px) !important;
	box-shadow: 0 8px 25px rgba(233, 69, 96, 0.4) !important;
	}

	footer {
	display: none !important;
	}
	"""

	with gr.Blocks(title="UltraData-Math L3 Generator", css=custom_css) as demo:
	gr.HTML('<h1 class="main-title">🧮 UltraData-Math L3 Generator</h1>')
	gr.HTML('<p class="subtitle">LLM-based Mathematical Data Synthesis Tool</p>')

	with gr.Row():
	model_select = gr.Dropdown(
	choices=["GLM_ar7snd", "GLM_pq0dvd", "GLM_35a7cn", "QWEN_czrd3t", "DEEPSEEK_5jcwxs"],
	value="GLM_ar7snd",
	label="Model",
	scale=1,
	)
	temperature = gr.Slider(
	minimum=0.0, maximum=1.5, value=0.7, step=0.1,
	label="Temperature",
	scale=1,
	)

	with gr.Tabs():
	# Q&A Synthesis Tab
	with gr.TabItem("📝 Q&A Synthesis"):
	gr.Markdown("根据数学内容生成问答对，按教育难度分级。")
	with gr.Row():
	with gr.Column():
	qa_input = gr.Textbox(
	label="Input Mathematical Content",
	placeholder="Enter mathematical content here...",
	lines=8,
	)
	qa_level = gr.Radio(
	choices=list(QA_PROMPTS.keys()),
	value="high_school",
	label="Difficulty Level",
	)
	qa_btn = gr.Button("🚀 Generate Q&A", variant="primary")
	with gr.Column():
	qa_problem = gr.Textbox(label="Generated Problem", lines=4)
	qa_solution = gr.Textbox(label="Generated Solution", lines=8)
	qa_raw = gr.Textbox(label="Raw Response", lines=4, visible=False)

	qa_btn.click(
	qa_synthesis,
	inputs=[qa_input, qa_level, model_select, temperature],
	outputs=[qa_problem, qa_solution, qa_raw],
	)

	# Conversation Synthesis Tab
	with gr.TabItem("💬 Conversation Synthesis"):
	gr.Markdown("将数学内容转换为多轮对话格式。")
	with gr.Row():
	with gr.Column():
	conv_input = gr.Textbox(
	label="Input Mathematical Content",
	placeholder="Enter mathematical content here...",
	lines=8,
	)
	conv_style = gr.Radio(
	choices=list(CONVERSATION_PROMPTS.keys()),
	value="teacher_student",
	label="Conversation Style",
	)
	conv_btn = gr.Button("🚀 Generate Conversation", variant="primary")
	with gr.Column():
	conv_output = gr.Textbox(label="Generated Conversation", lines=15)
	conv_raw = gr.Textbox(label="Raw Response", lines=4, visible=False)

	conv_btn.click(
	conversation_synthesis,
	inputs=[conv_input, conv_style, model_select, temperature],
	outputs=[conv_output, conv_raw],
	)

	# Rewrite Tab
	with gr.TabItem("✨ Multi-style Rewrite"):
	gr.Markdown("将数学内容改写为不同风格。")
	with gr.Row():
	with gr.Column():
	rewrite_input = gr.Textbox(
	label="Input Mathematical Content",
	placeholder="Enter mathematical content here...",
	lines=8,
	)
	rewrite_style = gr.Radio(
	choices=list(MULTISTYLE_PROMPTS.keys()),
	value="textbook",
	label="Rewrite Style",
	)
	rewrite_btn = gr.Button("🚀 Rewrite", variant="primary")
	with gr.Column():
	rewrite_output = gr.Textbox(label="Rewritten Content", lines=15)
	rewrite_raw = gr.Textbox(label="Raw Response", lines=4, visible=False)

	rewrite_btn.click(
	rewrite_synthesis,
	inputs=[rewrite_input, rewrite_style, model_select, temperature],
	outputs=[rewrite_output, rewrite_raw],
	)

	# Knowledge Extraction Tab
	with gr.TabItem("📚 Knowledge Extraction"):
	gr.Markdown("从数学内容中提取定义、定理、性质等知识点。")
	with gr.Row():
	with gr.Column():
	know_input = gr.Textbox(
	label="Input Mathematical Content",
	placeholder="Enter mathematical content here...",
	lines=10,
	)
	know_btn = gr.Button("🚀 Extract Knowledge", variant="primary")
	with gr.Column():
	know_output = gr.Textbox(label="Extracted Knowledge Points", lines=15)
	know_raw = gr.Textbox(label="Raw Response", lines=4, visible=False)

	know_btn.click(
	knowledge_extraction,
	inputs=[know_input, model_select, temperature],
	outputs=[know_output, know_raw],
	)

	# Textbook Exercise Tab
	with gr.TabItem("📖 Textbook Exercise"):
	gr.Markdown("基于知识点生成不同难度的教材式练习。")
	with gr.Row():
	with gr.Column():
	textbook_input = gr.Textbox(
	label="Input Knowledge Point",
	placeholder="Enter a mathematical knowledge point...",
	lines=6,
	)
	textbook_diff = gr.Radio(
	choices=list(TEXTBOOK_EXERCISE_PROMPTS.keys()),
	value="easy",
	label="Difficulty",
	)
	textbook_btn = gr.Button("🚀 Generate Exercise", variant="primary")
	with gr.Column():
	textbook_output = gr.Textbox(label="Generated Exercise Material", lines=15)
	textbook_raw = gr.Textbox(label="Raw Response", lines=4, visible=False)

	textbook_btn.click(
	textbook_exercise,
	inputs=[textbook_input, textbook_diff, model_select, temperature],
	outputs=[textbook_output, textbook_raw],
	)

	gr.HTML("""
	<div style="text-align: center; margin-top: 2rem; padding: 1rem; color: #64748b; font-size: 0.85rem;">
	<p>🔬 <strong>UltraData-Math L3 Generator</strong> - Part of the UltraData-Math Project</p>
	<p>LLM-based data synthesis for Q&A, conversations, rewriting, and more.</p>
	</div>
	""")


	if __name__ == "__main__":
	demo.launch(ssr_mode=False)