File size: 18,202 Bytes
1a93d7a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0701598
 
 
1a93d7a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5b5571e
1a93d7a
5b5571e
 
 
 
 
 
1a93d7a
 
 
5b5571e
1a93d7a
5b5571e
 
1a93d7a
5b5571e
 
 
1a93d7a
5b5571e
 
 
 
 
 
 
 
 
 
 
 
 
1a93d7a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0701598
 
1a93d7a
0701598
1a93d7a
0701598
1a93d7a
0701598
1a93d7a
 
 
 
 
 
 
 
 
 
 
 
 
5b5571e
1a93d7a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0701598
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1a93d7a
 
 
 
 
 
 
5b5571e
1a93d7a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0701598
1a93d7a
 
0701598
1a93d7a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0701598
1a93d7a
0701598
1a93d7a
 
0701598
1a93d7a
 
 
 
 
 
 
 
 
5b5571e
1a93d7a
0701598
1a93d7a
 
0701598
1a93d7a
0701598
1a93d7a
 
 
 
 
 
 
0701598
 
1a93d7a
 
 
 
 
 
0701598
1a93d7a
 
0701598
1a93d7a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0701598
 
 
1a93d7a
0701598
 
 
1a93d7a
0701598
1a93d7a
0701598
1a93d7a
 
 
 
 
 
 
 
 
 
5b5571e
1a93d7a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
"""数据源模块,负责从各种数据源获取用户配置信息。

此模块包含以下主要功能:
- 从本地文件获取职业数据
- 生成真实的地理位置信息
- 生成年龄信息
- 生成性别信息
"""

import json
import os
import random
from typing import Dict, List, Optional, Union, Any
from geonamescache import GeonamesCache
from config import get_completion, parse_gpt_response, parse_json_response, extract_json_from_markdown, parse_nested_json_response

_occupations_cache = None

def get_occupations() -> List[str]:
    """从本地文件获取职业数据。

    从预定义的JSON文件中读取职业列表。使用缓存机制避免重复读取文件。
    如果文件读取失败,将返回空列表。

    Returns:
        List[str]: 职业列表。如果获取失败则返回空列表
    """
    global _occupations_cache

    if _occupations_cache is not None:
        return _occupations_cache

    try:
        # 修改路径,使用项目根目录下的data文件夹
        project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
        file_path = os.path.join(project_root, 'data', 'occupations_english.json')
        with open(file_path, 'r', encoding='utf-8') as f:
            _occupations_cache = json.load(f)
        return _occupations_cache
    except Exception as e:
        print(f"Error loading occupations data: {e}")
        return []

def generate_age_info() -> Dict[str, Union[int, str]]:
    """生成年龄信息。

    首先在3-95岁范围内随机生成一个年龄,然后根据年龄确定对应的年龄组。
    年龄组包括:幼儿、儿童、青少年、青年、成年、中年、老年。

    Returns:
        Dict[str, Union[int, str]]: 包含以下字段的字典:
            - age: 具体年龄(整数)
            - age_group: 年龄组类别
    """
    # 首先随机生成年龄
    age = random.randint(7, 85)

    # 根据年龄确定年龄组
    if age <= 6:
        age_group = "toddler"  # 幼儿
    elif age <= 12:
        age_group = "child"    # 儿童
    elif age <= 19:
        age_group = "adolescent"  # 青少年
    elif age <= 29:
        age_group = "young_adult"  # 青年
    elif age <= 45:
        age_group = "adult"  # 成年
    elif age <= 65:
        age_group = "middle_aged"  # 中年
    else:
        age_group = "senior"  # 老年

    return {
        "age": age,
        "age_group": age_group
    }

def generate_career_info(age: int) -> Dict[str, str]:
    """生成职业相关信息。

    根据年龄决定职业生成方式:
    - 对18岁以下和65岁以上的人,职业状态将由GPT根据年龄生成
    - 其他年龄段的人,从本地职业数据库中随机选择职业

    Args:
        age: 年龄

    Returns:
        Dict[str, str]: 职业相关信息,包含职业状态
    """
    if age < 18 or age > 65:
        # 对18岁以下和65岁以上的人,职业状态由GPT生成
        prompt = f"Generate an appropriate occupation or status for a {age} year old person. "
        if age < 18:
            prompt += "Consider that the individuals are likely in school or engaged in youth activities. They may not have any formal occupation. If appropriate, you can mention their student status or indicate they have no occupation yet. Only in some cases, consider potential interest in early employment opportunities, internships, or non-traditional educational paths."
        else:  # age > 65
            prompt += "Consider they might be retired but could still be active in various ways."

        messages = [
            {"role": "system", "content": "You are an AI that generates realistic occupation statuses based on age. Respond with just the status, no explanation."},
            {"role": "user", "content": prompt}
        ]

        status = get_completion(messages)
        if not status:
            return {"status": ""}
        return {"status": status}

    # 其他年龄段从职业数据库选择
    occupations = get_occupations()
    if not occupations:
        return {"status": ""}

    career_status = random.choice(occupations)
    return {"status": career_status}

def generate_location() -> Dict[str, str]:
    """生成真实的地理位置信息。

    使用 GeoNames 数据库随机选择一个国家和城市。

    Returns:
        Dict[str, str]: 包含以下字段的字典:
            - country: 国家名称
            - city: 城市名称
    """
    gc = GeonamesCache()

    # 获取所有国家
    countries = gc.get_countries()
    country_code = random.choice(list(countries.keys()))
    country = countries[country_code]

    # 获取选国家的所有城市
    cities = gc.get_cities()
    country_cities = [city for city in cities.values() if city['countrycode'] == country_code]

    if not country_cities:
        return {
            "country": country['name'],
            "city": "Unknown City"
        }

    # 随机选择一个城市
    city_data = random.choice(country_cities)

    return {
        "country": country['name'],
        "city": city_data['name']
    }



def generate_gender() -> str:
    """随机生成性别(男/女)"""
    return random.choice(['male', 'female'])


def generate_personal_values(age: int, gender: str, occupation: str, location: Dict[str, str]) -> Dict[str, str]:
    """使用GPT根据人口统计信息生成个人价值观。

    首先从 positive, negative, neutral 三种类型中随机选择一种价值观类型,
    然后将这个类型放入prompt中,让GPT生成具体的价值观细节。

    Args:
        age: 人物年龄
        gender: 人物性别
        occupation: 人物职业
        location: 包含'city'和'country'键的字典

    Returns:
        包含'values_orientation'键的字典,其中包含价值观描述
    """
    # 随机选择价值观类型
    value_type = random.choice(['positive', 'negative', 'neutral'])
    
    prompt = f"""
    Generate a concise description of a person's core values and belief system based on:
    Age: {age}, Gender: {gender}, Occupation: {occupation}, Location: {location['city']}, {location['country']}

    IMPORTANT: This person has a {value_type.upper()} value system. Their values may be entirely consistent with their personal background or may conflict with it. Avoid introducing unnecessary contrasts or contradictions in their beliefs. Try to avoid being related to the community as much as possible. Avoid using words with similar meanings to ‘balance’ and ‘balance’.

    Please generate a short phrase that clearly captures the essence of this person's core values and beliefs without adding conflicting ideas or turnarounds.

    CRITICAL: You must format your response EXACTLY as a valid JSON object with this structure:
    {{
        "values_orientation": "short phrase describing their values"
    }}

    DO NOT include any text before or after the JSON. The response must be parseable by json.loads().
    """
    
    messages = [
        {"role": "system", "content": "You are an assistant that generates realistic human value systems in ONE SENTENCE, including both positive and negative values. You ALWAYS respond with valid JSON objects that can be parsed by json.loads()."}, 
        {"role": "user", "content": prompt}
    ]
    
    try:
        response = get_completion(messages, temperature=1.0)
        
        # 使用config.py中的解析函数
        result = parse_gpt_response(
            response, 
            expected_fields=["values_orientation"], 
            field_defaults={"values_orientation": ""}
        )
        
        return {
            "values_orientation": result.get("values_orientation") or response.strip()
        }
    except Exception as e:
        print(f"\nError in generate_personal_values: {e}")
        raise




def generate_life_attitude(age: int = None, gender: str = None, occupation: str = None, 
                        location: Dict[str, str] = None, values_orientation: str = None) -> Dict[str, Union[str, Dict, bool]]:
    """使用GPT根据人口统计信息和个人价值观生成生活态度。

    此函数使用GPT根据年龄、性别、职业、地区和个人价值观生成一个人的生活态度。
    生成的态度可能是积极的、中性的或消极的。

    Args:
        age: 人物年龄
        gender: 人物性别
        occupation: 人物职业
        location: 包含'city'和'country'键的字典
        values_orientation: 个人价值观描述

    Returns:
        Dict: 包含以下字段的字典:
            - attitude: 表示用户生活态度的字符串
            - attitude_details: 关于这种态度如何表现的详细信息
            - coping_mechanism: 人物如何应对生活挑战
    """
    
    # 创建提示词
    prompt = f"""
    Generate specific attributes about a person's life attitude based on the following information:
    
    Age: {age}
    Gender: {gender}
    Occupation: {occupation}
    Location: {location['city']}, {location['country']}
    Core Values: {values_orientation}
    
    IMPORTANT: This person's attitude toward life can be positive, neutral, or negative. In a negative state, they may hold a pessimistic, cynical, or even nihilistic view of life. Avoid involving concepts such as community or balance. Avoid using words with similar meanings to ‘balance’ and ‘balance’.
    
    I need you to generate ONLY the following specific attributes, each expressed as a single sentence:
    
    1. attitude: A single, concise sentence (5-10 words) describing their overall life attitude
    2. attitude_details: A single sentence (15-20 words) explaining how this attitude manifests in their daily life
    3. coping_mechanism: A single sentence (5-10 words) describing how they deal with challenges
    
    CRITICAL: You must format your response EXACTLY as a valid JSON object with this structure:
    {{"attitude": "single sentence", "attitude_details": "single sentence", "coping_mechanism": "single sentence"}}
    
    DO NOT include any text before or after the JSON. The response must be parseable by json.loads().
    """
    
    messages = [
        {"role": "system", "content": "You are an assistant that generates realistic human life attitudes in ONE SENTENCE, including positive, neutral, and negative outlooks. You ALWAYS respond with valid JSON objects that can be parsed by json.loads()."}, 
        {"role": "user", "content": prompt}
    ]
    
    try:
        response = get_completion(messages, temperature=1.0)
        
        # 使用config.py中的解析函数
        result = parse_gpt_response(
            response,
            expected_fields=["attitude", "attitude_details", "coping_mechanism"],
            field_defaults={
                "attitude": "",
                "attitude_details": "",
                "coping_mechanism": ""
            }
        )
        
        # 检查是否有空值
        for field in ["attitude", "attitude_details", "coping_mechanism"]:
            if not result[field]:
                raise ValueError(f"Missing required field: {field}")
        
        attitude = result["attitude"]
        attitude_details = result["attitude_details"]
        coping_mechanism = result["coping_mechanism"]
    except Exception as e:
        print(f"\nError in generate_life_attitude: {e}")
        raise
    
    # 返回结果
    return {
        "attitude": attitude,
        "attitude_details": attitude_details,
        "coping_mechanism": coping_mechanism
    }

def generate_personal_story(age: int, gender: str, occupation: str, location: Dict[str, str], 
                                    values_orientation: str, life_attitude: Dict[str, str]) -> Dict[str, str]:
    """根据人物的基本信息生成个人故事。

    使用GPT根据年龄、性别、职业、地区、价值观和生活态度生成一个人物的详细故事。
    故事可以包含各种正面或负面的经历,包括极端情况。

    Args:
        age: 人物年龄
        gender: 人物性别
        occupation: 人物职业
        location: 包含'city'和'country'键的字典
        values_orientation: 个人价值观描述
        life_attitude: 生活态度信息字典

    Returns:
        Dict: 包含以下字段的字典:
            - personal_story: 个人故事描述
    """
    # 提取生活态度信息
    attitude = life_attitude.get("attitude", "")
    attitude_category = life_attitude.get("attitude_category", "neutral")
    
    # 创建提示词
    # 随机决定生成几个故事
    num_stories = random.randint(1, 3)
    
    prompt = f"""
    Generate {num_stories} concise personal stories for a person with the following characteristics:
    
    Age: {age}
    Gender: {gender}
    Occupation: {occupation}
    Location: {location['city']}, {location['country']}
    Core Values: {values_orientation}
    Life Attitude: {attitude} ({attitude_category})
    
    IMPORTANT: The story can be positive, negative, or a mix of both. Please do not avoid including life experiences that may be controversial or have negative consequences. The narration should be as specific as possible, objective, and free from any subjective comments or value judgments. The stories that unfold should be closely related to their country and region, reflecting events that could genuinely happen to the people there. This could be a random event unrelated to the background, or a significant turning point in their life. Please avoid including anything related to community building.
    
    Please provide {num_stories} brief personal stories (each 150-200 words).
    
    CRITICAL: You must format your response EXACTLY as a valid JSON object with this structure:
    {{"personal_stories": ["story 1", "story 2", ...]}}
    
    DO NOT include any text before or after the JSON. The response must be parseable by json.loads().
    """
    messages = [
        {"role": "system", "content": "You are an assistant that generates concise realistic personal stories, including both positive and negative life experiences. You ALWAYS respond with valid JSON objects that can be parsed by json.loads()."}, 
        {"role": "user", "content": prompt}
    ]
    
    try:
        response = get_completion(messages, temperature=1.0)
        
        # 解析JSON响应,只获取个人故事
        result = parse_gpt_response(
            response,
            expected_fields=["personal_stories"],
            field_defaults={
                "personal_stories": []
            }
        )
        
        # 处理多故事格式
        stories = result["personal_stories"]
        
        # 确保有数据
        if not stories:
            raise ValueError("Failed to generate personal stories")
        
        # 将多故事格式转换为统一的格式
        # 将所有故事连接起来,用分隔符隔开
        combined_story = "\n\n".join([f"Story {i+1}: {story}" for i, story in enumerate(stories)])
        
        return {
            "personal_story": combined_story
        }
    except Exception as e:
        print(f"\nError in generate_personal_story: {e}")
        raise

def generate_interests_and_hobbies(personal_story: Dict[str, Any]) -> Dict[str, Any]:
    """根据人物的故事生成兴趣爱好列表。

    基于人物的个人故事和关键生活事件,生成兴趣爱好列表,可以包含好的或坏的习惯。
    严格基于故事内容生成,生成3-4个兴趣爱好。

    Args:
        personal_story: 人物的个人故事字典,包含"personal_story"和"key_life_events"字段

    Returns:
        Dict: 包含以下字段的字典:
            - interests: 兴趣爱好列表(3-4个)
    """
    # 确保我们有个人故事数据
    if not personal_story or not isinstance(personal_story, dict):
        raise ValueError("必须提供个人故事数据才能生成兴趣爱好")
    
    personal_story_data = personal_story
    
    # 提取故事和关键事件
    story_text = personal_story_data.get("personal_story", "")
    
    # 创建提示词
    prompt = f"""
   Based on the following personal story and key life events, infer two to three hobbies or interests this person might use to relax. These activities can be positive or negative and may include non-traditional, controversial, or unexpected ones, such as various sports, traveling, or even smoking, drinking, or using marijuana. Please make inferences about the person’s possible interests based on the story, rather than simply extracting them directly from the story. 
   
     Personal Story: {story_text}

     IMPORTANT: Avoid including anything related to community-building activities.
     
    Please extract 2 hobbies or interests based on these reflections and format your response as a JSON object:

     {{
        "interests": ["interest1", "interest2"]
     }}

    DO NOT include any text before or after the JSON. The response must be parseable by json.loads().
    """
    
    messages = [
        {"role": "system", "content": "You are an assistant that extracts realistic interests and hobbies from a person's life story, including both positive activities and negative habits. You ALWAYS respond with valid JSON objects that can be parsed by json.loads()."}, 
        {"role": "user", "content": prompt}
    ]
    
    try:
        response = get_completion(messages, temperature=0.9)
        
        # 使用config.py中的解析函数
        from config import parse_gpt_response
        result = parse_gpt_response(
            response, 
            expected_fields=["interests"], 
            field_defaults={"interests": []}
        )
        
        # 获取兴趣爱好列表
        interests = result.get("interests", [])
    except Exception as e:
        # 记录错误并重新抛出
        print(f"\nError in generate_interests_and_hobbies: {e}")
        raise
    
    # 输出生成的兴趣爱好数量信息
    print(f"\nInfo: 生成的兴趣爱好数量: {len(interests)}")
    
    # 返回结果
    return {
        "interests": interests
    }