Spaces:

AIgoose
/

mirofish-engine

Sleeping

App Files Files Community

mirofish-engine / backend /scripts /test_profile_format.py

666ghj

Enhance backend functionality with OASIS simulation features

c5b6f70 5 months ago

raw

history blame contribute delete

5.86 kB

	"""
	测试Profile格式生成是否符合OASIS要求
	验证：
	1. Twitter Profile生成CSV格式
	2. Reddit Profile生成JSON详细格式
	"""

	import os
	import sys
	import json
	import csv
	import tempfile

	# 添加项目路径
	sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

	from app.services.oasis_profile_generator import OasisProfileGenerator, OasisAgentProfile


	def test_profile_formats():
	"""测试Profile格式"""
	print("=" * 60)
	print("OASIS Profile格式测试")
	print("=" * 60)

	# 创建测试Profile数据
	test_profiles = [
	OasisAgentProfile(
	user_id=0,
	user_name="test_user_123",
	name="Test User",
	bio="A test user for validation",
	persona="Test User is an enthusiastic participant in social discussions.",
	karma=1500,
	friend_count=100,
	follower_count=200,
	statuses_count=500,
	age=25,
	gender="male",
	mbti="INTJ",
	country="China",
	profession="Student",
	interested_topics=["Technology", "Education"],
	source_entity_uuid="test-uuid-123",
	source_entity_type="Student",
	),
	OasisAgentProfile(
	user_id=1,
	user_name="org_official_456",
	name="Official Organization",
	bio="Official account for Organization",
	persona="This is an official institutional account that communicates official positions.",
	karma=5000,
	friend_count=50,
	follower_count=10000,
	statuses_count=200,
	profession="Organization",
	interested_topics=["Public Policy", "Announcements"],
	source_entity_uuid="test-uuid-456",
	source_entity_type="University",
	),
	]

	generator = OasisProfileGenerator.__new__(OasisProfileGenerator)

	# 使用临时目录
	with tempfile.TemporaryDirectory() as temp_dir:
	twitter_path = os.path.join(temp_dir, "twitter_profiles.csv")
	reddit_path = os.path.join(temp_dir, "reddit_profiles.json")

	# 测试Twitter CSV格式
	print("\n1. 测试Twitter Profile (CSV格式)")
	print("-" * 40)
	generator._save_twitter_csv(test_profiles, twitter_path)

	# 读取并验证CSV
	with open(twitter_path, 'r', encoding='utf-8') as f:
	reader = csv.DictReader(f)
	rows = list(reader)

	print(f" 文件: {twitter_path}")
	print(f" 行数: {len(rows)}")
	print(f" 表头: {list(rows[0].keys())}")
	print(f"\n 示例数据 (第1行):")
	for key, value in rows[0].items():
	print(f" {key}: {value}")

	# 验证必需字段
	required_twitter_fields = ['user_id', 'user_name', 'name', 'bio',
	'friend_count', 'follower_count', 'statuses_count', 'created_at']
	missing = set(required_twitter_fields) - set(rows[0].keys())
	if missing:
	print(f"\n [错误] 缺少字段: {missing}")
	else:
	print(f"\n [通过] 所有必需字段都存在")

	# 测试Reddit JSON格式
	print("\n2. 测试Reddit Profile (JSON详细格式)")
	print("-" * 40)
	generator._save_reddit_json(test_profiles, reddit_path)

	# 读取并验证JSON
	with open(reddit_path, 'r', encoding='utf-8') as f:
	reddit_data = json.load(f)

	print(f" 文件: {reddit_path}")
	print(f" 条目数: {len(reddit_data)}")
	print(f" 字段: {list(reddit_data[0].keys())}")
	print(f"\n 示例数据 (第1条):")
	print(json.dumps(reddit_data[0], ensure_ascii=False, indent=4))

	# 验证详细格式字段
	required_reddit_fields = ['realname', 'username', 'bio', 'persona']
	optional_reddit_fields = ['age', 'gender', 'mbti', 'country', 'profession', 'interested_topics']

	missing = set(required_reddit_fields) - set(reddit_data[0].keys())
	if missing:
	print(f"\n [错误] 缺少必需字段: {missing}")
	else:
	print(f"\n [通过] 所有必需字段都存在")

	present_optional = set(optional_reddit_fields) & set(reddit_data[0].keys())
	print(f" [信息] 可选字段: {present_optional}")

	print("\n" + "=" * 60)
	print("测试完成!")
	print("=" * 60)


	def show_expected_formats():
	"""显示OASIS期望的格式"""
	print("\n" + "=" * 60)
	print("OASIS 期望的Profile格式参考")
	print("=" * 60)

	print("\n1. Twitter Profile (CSV格式)")
	print("-" * 40)
	twitter_example = """user_id,user_name,name,bio,friend_count,follower_count,statuses_count,created_at
	0,user0,User Zero,I am user zero with interests in technology.,100,150,500,2023-01-01
	1,user1,User One,Tech enthusiast and coffee lover.,200,250,1000,2023-01-02"""
	print(twitter_example)

	print("\n2. Reddit Profile (JSON详细格式)")
	print("-" * 40)
	reddit_example = [
	{
	"realname": "James Miller",
	"username": "millerhospitality",
	"bio": "Passionate about hospitality & tourism.",
	"persona": "James is a seasoned professional in the Hospitality & Tourism industry...",
	"age": 40,
	"gender": "male",
	"mbti": "ESTJ",
	"country": "UK",
	"profession": "Hospitality & Tourism",
	"interested_topics": ["Economics", "Business"]
	}
	]
	print(json.dumps(reddit_example, ensure_ascii=False, indent=2))


	if __name__ == "__main__":
	test_profile_formats()
	show_expected_formats()