Spaces:

sshenai
/

DPLproject

Runtime error

App Files Files Community

DPLproject / app.py

sshenai

Update app.py

bfac0f6 verified 8 months ago

raw

history blame contribute delete

4.36 kB

	# -- coding: utf-8 --
	"""
	鸟类知识科普系统（Qwen3优化版） by [你的名字]
	ISOM5240 Group Project
	"""


	import transformers
	import os
	import torch
	import gradio as gr
	from PIL import Image
	from transformers import (
	pipeline,
	AutoConfig,
	AutoImageProcessor
	)

	# ========== 环境配置 ==========
	os.environ["TRANSFORMERS_CACHE"] = "./model_cache"
	DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
	print(f"当前设备: {DEVICE.upper()}")

	# ========== 模型初始化 ==========
	def init_models():
	"""统一模型加载逻辑"""
	try:
	# 1. 鸟类分类模型（显式配置图像处理器）
	image_processor = AutoImageProcessor.from_pretrained(
	"chriamue/bird-species-classifier",
	use_fast=True # 强制启用快速模式
	)
	classifier = pipeline(
	task="image-classification",
	model="chriamue/bird-species-classifier",
	feature_extractor=image_processor,
	device=DEVICE
	)

	# 2. 文本生成模型（解决revision参数冲突）
	qwen_config = AutoConfig.from_pretrained(
	"Qwen/Qwen-7B-Chat",
	trust_remote_code=True
	)
	text_generator = pipeline(
	task="text-generation",
	model="Qwen/Qwen-7B-Chat",
	config=qwen_config,
	torch_dtype=torch.bfloat16,
	device_map="auto",
	model_kwargs={
	"cache_dir": "./model_cache",
	"revision": "main" # 唯一指定版本
	}
	)

	# 3. 语音合成模型
	tts = pipeline(
	task="text-to-speech",
	model="facebook/mms-tts-eng",
	device=DEVICE
	)

	return classifier, text_generator, tts

	except Exception as e:
	raise RuntimeError(f"模型加载失败: {str(e)}")

	# ========== 核心逻辑 ==========
	def generate_child_friendly_text(bird_name):
	"""生成儿童友好型描述（优化prompt工程）"""
	prompt = f"""用6-12岁儿童能理解的语言描述{bird_name}：
	★ 使用比喻（例如：羽毛像彩虹糖纸）
	★ 包含趣味冷知识（例如：每天吃自身体重30%的食物）
	★ 语句简短（每句不超过15个英文单词）
	★ 避免专业术语"""

	try:
	response = text_generator(
	prompt,
	max_new_tokens=120,
	temperature=0.7,
	top_p=0.9,
	do_sample=True
	)
	return response[0]['generated_text'].split("描述")[-1].strip()
	except Exception as e:
	return f"文本生成失败: {str(e)}"

	def process_image(img):
	"""端到端处理流程"""
	try:
	# 1. 图像分类
	classification = classifier(img)
	bird_name = classification[0]['label']

	# 2. 生成描述
	description = generate_child_friendly_text(bird_name)

	# 3. 语音合成
	speech = tts(description, forward_params={"speaker_id": 6})

	return bird_name, description, speech["audio"]

	except Exception as e:
	return "错误", f"处理失败: {str(e)}", None

	# ========== 初始化验证 ==========
	if __name__ == "__main__":
	# 预加载模型（验证可用性）
	classifier, text_generator, tts = init_models()

	# 构建Gradio界面
	with gr.Blocks(theme=gr.themes.Soft(), css=".gradio-container {max-width: 800px}") as demo:
	gr.Markdown("# 🐦 鸟类知识小课堂（稳定版）")

	with gr.Row():
	img_input = gr.Image(type="pil", label="上传鸟类图片", height=300)
	audio_output = gr.Audio(label="语音讲解", autoplay=True)

	with gr.Column():
	name_output = gr.Textbox(label="识别结果")
	desc_output = gr.Textbox(label="趣味知识", lines=4)

	gr.Examples(
	examples=["test_images/eagle.jpg", "test_images/penguin.jpg"],
	inputs=img_input,
	label="示例图片"
	)

	img_input.change(
	process_image,
	inputs=img_input,
	outputs=[name_output, desc_output, audio_output]
	)

	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=True
	)