jonghhhh commited on
Commit
62b1578
ยท
0 Parent(s):

Initial commit: NBS Persona Survey System

Browse files
.gitattributes ADDED
@@ -0,0 +1 @@
 
 
1
+ *.parquet filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.py[cod]
3
+ *$py.class
4
+ *.so
5
+ .env
6
+ .venv
7
+ venv/
8
+ ENV/
9
+ .DS_Store
10
+ *.log
11
+ survey_results_*.json
12
+ ์ „๊ตญ์ง€ํ‘œ์กฐ์‚ฌ_์›๋ณธ/
13
+ ์ „๊ตญ์ง€ํ‘œ์กฐ์‚ฌ_json/
14
+ *.md~
15
+ .ipynb_checkpoints/
Dockerfile ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # ์‹œ์Šคํ…œ ํŒจํ‚ค์ง€ ์„ค์น˜
6
+ RUN apt-get update && apt-get install -y \
7
+ build-essential \
8
+ && rm -rf /var/lib/apt/lists/*
9
+
10
+ # Python ํŒจํ‚ค์ง€ ์„ค์น˜
11
+ COPY requirements.txt .
12
+ RUN pip install --no-cache-dir -r requirements.txt
13
+
14
+ # ์• ํ”Œ๋ฆฌ์ผ€์ด์…˜ ํŒŒ์ผ ๋ณต์‚ฌ
15
+ COPY app.py .
16
+ COPY rag_engine.py .
17
+ COPY avatar_synthetic.py .
18
+ COPY index.html .
19
+ COPY consolidated_nbs_data.parquet .
20
+ COPY nbs_questions_index.parquet .
21
+
22
+ # ํฌํŠธ ๋…ธ์ถœ
23
+ EXPOSE 7860
24
+
25
+ # FastAPI ์‹คํ–‰
26
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: NBS Persona Survey
3
+ emoji: ๐ŸŽญ
4
+ colorFrom: blue
5
+ colorTo: purple
6
+ sdk: docker
7
+ pinned: false
8
+ ---
9
+
10
+ # NBS ํŽ˜๋ฅด์†Œ๋‚˜ ์„ค๋ฌธ์กฐ์‚ฌ ์‹œ์Šคํ…œ
11
+
12
+ ์ „๊ตญ์ง€ํ‘œ์กฐ์‚ฌ(NBS) ๋ฐ์ดํ„ฐ ๊ธฐ๋ฐ˜ ํŽ˜๋ฅด์†Œ๋‚˜ ์•„๋ฐ”ํƒ€ ์„ค๋ฌธ ์‹œ๋ฎฌ๋ ˆ์ด์…˜ API
13
+
14
+ ## ๊ธฐ๋Šฅ
15
+
16
+ - **์‹ค์ œ ์‘๋‹ต์ž ๋ชจ๋“œ**: 16๋งŒ ๊ฑด์˜ ์‹ค์ œ ์‘๋‹ต์ž ๋ฐ์ดํ„ฐ ๊ธฐ๋ฐ˜ ์‹œ๋ฎฌ๋ ˆ์ด์…˜
17
+ - **ํ†ต๊ณ„ ๊ธฐ๋ฐ˜ ๋ชจ๋“œ**: ํŠน์ • ๊ทธ๋ฃน์˜ ํ†ต๊ณ„์  ๊ฒฝํ–ฅ์„ฑ์„ ๋ฐ˜์˜ํ•œ ์‹œ๋ฎฌ๋ ˆ์ด์…˜
18
+ - **RAG ๊ธฐ๋ฐ˜ ์‘๋‹ต ์ƒ์„ฑ**: ๊ณผ๊ฑฐ ์‘๋‹ต ์ด๋ ฅ์„ ์ฐธ์กฐํ•˜์—ฌ ์ผ๊ด€์„ฑ ์žˆ๋Š” ๋‹ต๋ณ€ ์ƒ์„ฑ
19
+
20
+ ## ๋ฐ์ดํ„ฐ
21
+
22
+ - ์ด ์‘๋‹ต์ž: 166,721๋ช…
23
+ - ์„ค๋ฌธ ํšŒ์ฐจ: 163ํšŒ (2020๋…„~ํ˜„์žฌ)
24
+ - ๊ณ ์œ  ์งˆ๋ฌธ: 1,219๊ฐœ
25
+
26
+ ## API ์‚ฌ์šฉ๋ฒ•
27
+
28
+ ### Health Check
29
+ ```bash
30
+ GET /health
31
+ ```
32
+
33
+ ### ์‹ค์ œ ์‘๋‹ต์ž ์‹œ๋ฎฌ๋ ˆ์ด์…˜
34
+ ```bash
35
+ POST /simulate/actual
36
+ {
37
+ "question": "์ •๋…„์—ฐ์žฅ์— ๋Œ€ํ•ด ์–ด๋–ป๊ฒŒ ์ƒ๊ฐํ•˜์‹ญ๋‹ˆ๊นŒ?",
38
+ "gender": "๋‚จ์ž",
39
+ "age": "31~40",
40
+ "region": "์„œ์šธ",
41
+ "job": "์‚ฌ๋ฌด/๊ธฐ์ˆ ์ง",
42
+ "sample": 5
43
+ }
44
+ ```
45
+
46
+ ### ํ†ต๊ณ„ ๊ธฐ๋ฐ˜ ์‹œ๋ฎฌ๋ ˆ์ด์…˜
47
+ ```bash
48
+ POST /simulate/synthetic
49
+ {
50
+ "question": "๊ธฐ๋ณธ์†Œ๋“์ œ ๋„์ž…์— ๋Œ€ํ•œ ์˜๊ฒฌ์€?",
51
+ "gender": "์—ฌ์ž",
52
+ "age": "20~29",
53
+ "region": "๊ฒฝ๊ธฐ",
54
+ "sample": 3
55
+ }
56
+ ```
57
+
58
+ ## ๊ธฐ์ˆ  ์Šคํƒ
59
+
60
+ - FastAPI
61
+ - Sentence Transformers (KR-SBERT)
62
+ - Google Gemini 2.5 Flash
63
+ - Pandas + PyArrow (Parquet)
64
+
65
+ ## ๋ผ์ด์„ ์Šค
66
+
67
+ ๋ฐ์ดํ„ฐ๋Š” ์ „๊ตญ์ง€ํ‘œ์กฐ์‚ฌ(NBS) ์›๋ณธ ๋ฐ์ดํ„ฐ๋ฅผ ๊ธฐ๋ฐ˜์œผ๋กœ ํ•ฉ๋‹ˆ๋‹ค.
app.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ from fastapi import FastAPI, HTTPException, Header
4
+ from fastapi.responses import FileResponse
5
+ from fastapi.staticfiles import StaticFiles
6
+ from pydantic import BaseModel
7
+ from typing import Optional, List
8
+ from rag_engine import NBSRagEngine
9
+ import pandas as pd
10
+ import json
11
+
12
+ # Import helpers from existing scripts if possible, or redefine for standalone stability
13
+ from avatar_synthetic import get_statistical_context
14
+
15
+ app = FastAPI(title="NBS Persona Survey API", description="์ „๊ตญ์ง€ํ‘œ์กฐ์‚ฌ(NBS) ๊ธฐ๋ฐ˜ ํŽ˜๋ฅด์†Œ๋‚˜ ์•„๋ฐ”ํƒ€ ์„ค๋ฌธ ์‹œ๋ฎฌ๋ ˆ์ด์…˜ API")
16
+
17
+ # Initialize Engine (Singleton-like)
18
+ PROJECT_DIR = os.path.dirname(os.path.abspath(__file__))
19
+ PARQUET_PATH = os.path.join(PROJECT_DIR, "consolidated_nbs_data.parquet")
20
+ INDEX_PATH = os.path.join(PROJECT_DIR, "nbs_questions_index.parquet")
21
+ MODEL_NAME = "snunlp/KR-SBERT-V40K-klueNLI-augSTS"
22
+
23
+ engine = NBSRagEngine(PARQUET_PATH, INDEX_PATH, MODEL_NAME)
24
+
25
+ @app.get("/")
26
+ async def get_index():
27
+ return FileResponse(os.path.join(PROJECT_DIR, "index.html"))
28
+
29
+ class SurveyRequest(BaseModel):
30
+ question: str
31
+ gender: Optional[str] = None
32
+ age: Optional[str] = None
33
+ region: Optional[str] = None
34
+ job: Optional[str] = None
35
+ sample: int = 5
36
+
37
+ @app.post("/simulate/actual")
38
+ async def simulate_actual(req: SurveyRequest, x_api_key: Optional[str] = Header(None)):
39
+ """์‹ค์ œ ์‘๋‹ต์ž ๊ธฐ๋ฐ˜ ์‹œ๋ฎฌ๋ ˆ์ด์…˜ (Actual Mode)"""
40
+ try:
41
+ # Restriction for simulation speed and stability
42
+ effective_sample = min(req.sample, 10)
43
+
44
+ # 1. Filter candidates
45
+ candidates = engine.filter_respondents(
46
+ gender=req.gender,
47
+ age=req.age,
48
+ region=req.region,
49
+ job=req.job
50
+ )
51
+
52
+ if len(candidates) == 0:
53
+ raise HTTPException(status_code=404, detail="์ง€์ •ํ•œ ์กฐ๊ฑด์— ๋งž๋Š” ์‘๋‹ต์ž๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค. ์กฐ๊ฑด์„ ์™„ํ™”ํ•ด ์ฃผ์„ธ์š”.")
54
+
55
+ # 2. Sample
56
+ sample_size = min(len(candidates), effective_sample)
57
+ sample = candidates.sample(n=sample_size)
58
+
59
+ # 3. Predict
60
+ sim_qs = engine.find_similar_questions(req.question, top_k=5)
61
+ persona_desc = f"{req.region or '์ „๊ตญ'} ๊ฑฐ์ฃผ, {req.age or '์ „์—ฐ๋ น'}, {req.gender or '์„ฑ๋ณ„๋ฌด๊ด€'}, ์ง์—…: {req.job or '์ง์—…๋ฌด๊ด€'}"
62
+
63
+ results = []
64
+ for idx, row in sample.iterrows():
65
+ context = engine.get_context_for_responder(row, sim_qs)
66
+ response = engine.generate_response(persona_desc, context, req.question, api_key=x_api_key)
67
+ results.append({
68
+ "respondent_id": str(row.get('id', idx)),
69
+ "survey_round": int(row.get('survey_round', 0)),
70
+ "demographics": {
71
+ "gender": row.get('gender'),
72
+ "region": row.get('region'),
73
+ "age": int(row.get('age')) if pd.notna(row.get('age')) else None,
74
+ "job": row.get('job')
75
+ },
76
+ "referenced_context": context.split('\n\n'),
77
+ "response": response
78
+ })
79
+ return results
80
+ except Exception as e:
81
+ raise HTTPException(status_code=500, detail=str(e))
82
+
83
+ @app.post("/simulate/synthetic")
84
+ async def simulate_synthetic(req: SurveyRequest, x_api_key: Optional[str] = Header(None)):
85
+ """๊ฐ€์ƒ ํ†ต๊ณ„ ๊ธฐ๋ฐ˜ ์‹œ๋ฎฌ๋ ˆ์ด์…˜ (Synthetic Mode)"""
86
+ try:
87
+ # 1. Get statistical base
88
+ candidates = engine.filter_respondents(
89
+ gender=req.gender,
90
+ age=req.age,
91
+ region=req.region,
92
+ job=req.job
93
+ )
94
+
95
+ if len(candidates) == 0:
96
+ raise HTTPException(status_code=404, detail="์ง€์ •ํ•œ ์กฐ๊ฑด์— ๋งž๋Š” ์‘๋‹ต์ž๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
97
+
98
+ # 2. Extract stats
99
+ sim_qs = engine.find_similar_questions(req.question, top_k=5)
100
+ stat_context = get_statistical_context(candidates, sim_qs)
101
+ persona_desc = f"{req.region or '์ „๊ตญ'} ๊ฑฐ์ฃผ, {req.age or '์ „์—ฐ๋ น๋Œ€'}, {req.gender or '์„ฑ๋ณ„๋ฌด๊ด€'}, ์ง์—…: {req.job or '์ง์—…๋ฌด๊ด€'} ๊ทธ๋ฃน์˜ ํ†ต๊ณ„์  ํ‰๊ท  ๋ชจ๋ธ"
102
+
103
+ results = []
104
+ for i in range(req.sample):
105
+ response = engine.generate_response(persona_desc, stat_context, req.question, api_key=x_api_key)
106
+ results.append({
107
+ "avatar_id": f"syn_{i}",
108
+ "demographics": {
109
+ "gender": req.gender,
110
+ "age": req.age,
111
+ "region": req.region,
112
+ "job": req.job
113
+ },
114
+ "referenced_stat_context": stat_context.split('\n\n'),
115
+ "response": response
116
+ })
117
+ return results
118
+ except Exception as e:
119
+ raise HTTPException(status_code=500, detail=str(e))
120
+
121
+ @app.get("/health")
122
+ async def health_check():
123
+ return {"status": "healthy", "total_records": len(engine.df)}
124
+
125
+ if __name__ == "__main__":
126
+ import uvicorn
127
+ uvicorn.run(app, host="0.0.0.0", port=8000)
avatar_synthetic.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ import pandas as pd
4
+ from rag_engine import NBSRagEngine
5
+ from tqdm import tqdm
6
+ import argparse
7
+ import json
8
+
9
+ def get_statistical_context(candidates, sim_qs):
10
+ context_lines = []
11
+
12
+ # 1. Broad Persona Statistics
13
+ persona_keywords = ['์ •์น˜', '์„ฑํ–ฅ', '์ด๋…', '์ง€์ง€', 'ํ›„๋ณด', '๊ฒฝ์ œ', '๋ถ€๋™์‚ฐ']
14
+ persona_cols = [c for c in candidates.columns if any(k in c for k in persona_keywords)]
15
+
16
+ # Pick top 5 most 'filled' persona columns to avoid noise
17
+ persona_cols = sorted(persona_cols, key=lambda c: candidates[c].count(), reverse=True)[:5]
18
+
19
+ all_qs = list(persona_cols)
20
+ for q in sim_qs:
21
+ if q not in all_qs:
22
+ all_qs.append(q)
23
+
24
+ for q in all_qs:
25
+ if q in candidates.columns:
26
+ # Calculate distribution
27
+ dist = candidates[q].value_counts(normalize=True).head(5) # Top 5 answers
28
+ dist_str = ", ".join([f"'{ans}' ({pct:.1%})" for ans, pct in dist.items()])
29
+ context_lines.append(f"๊ณผ๊ฑฐ ๋ฐ์ดํ„ฐ ํ†ต๊ณ„ ({q}): {dist_str}")
30
+ return "\n\n".join(context_lines)
31
+
32
+ def run_synthetic_avatar_survey(target_question, demographics, top_k_context=5, num_avatars=1000):
33
+ project_dir = os.path.dirname(os.path.abspath(__file__))
34
+ parquet = os.path.join(project_dir, "consolidated_nbs_data.parquet")
35
+ db = os.path.join(project_dir, "nbs_questions_index.parquet")
36
+ model = "snunlp/KR-SBERT-V40K-klueNLI-augSTS"
37
+
38
+ engine = NBSRagEngine(parquet, db, model)
39
+
40
+ # 1. Get statistical base from candidates
41
+ candidates = engine.filter_respondents(
42
+ gender=demographics.get('gender'),
43
+ age=demographics.get('age'),
44
+ region=demographics.get('region'),
45
+ job=demographics.get('job')
46
+ )
47
+
48
+ if len(candidates) == 0:
49
+ print("No candidates found for these demographics.")
50
+ return []
51
+
52
+ # 2. Find context questions
53
+ sim_qs = engine.find_similar_questions(target_question, top_k=top_k_context)
54
+ stat_context = get_statistical_context(candidates, sim_qs)
55
+
56
+ persona_desc = f"{demographics.get('region', '์ „๊ตญ')} ๊ฑฐ์ฃผ, {demographics.get('age', '์ „์—ฐ๋ น๋Œ€')}, {demographics.get('gender', '์„ฑ๋ณ„๋ฌด๊ด€')}, ์ง์—…: {demographics.get('job', '์ง์—…๋ฌด๊ด€')} ๊ทธ๋ฃน์˜ ํ†ต๊ณ„์  ํ‰๊ท  ๋ชจ๋ธ"
57
+
58
+ print(f"Generating {num_avatars} synthetic responses based on group statistics...")
59
+ results = []
60
+
61
+ for i in tqdm(range(num_avatars)):
62
+ response = engine.generate_response(persona_desc, stat_context, target_question)
63
+ results.append({
64
+ "avatar_id": f"syn_{i}",
65
+ "demographics": demographics,
66
+ "referenced_stat_context": stat_context.split('\n\n'),
67
+ "response": response
68
+ })
69
+
70
+ return results
71
+
72
+ if __name__ == "__main__":
73
+ parser = argparse.ArgumentParser()
74
+ parser.add_argument("--question", type=str, required=True)
75
+ parser.add_argument("--gender", type=str, default=None)
76
+ parser.add_argument("--age", type=str, default=None, help="๋‚˜์ด ๋˜๋Š” ๋‚˜์ด ๋ฒ”์œ„ (์˜ˆ: 20 ๋˜๋Š” 21~29)")
77
+ parser.add_argument("--region", type=str, default=None)
78
+ parser.add_argument("--job", type=str, default=None)
79
+ parser.add_argument("--sample", type=int, default=10, help="Number of synthetic avatars to generate")
80
+
81
+ args = parser.parse_args()
82
+
83
+ demos = {
84
+ 'gender': args.gender,
85
+ 'age': args.age,
86
+ 'region': args.region,
87
+ 'job': args.job
88
+ }
89
+
90
+ survey_results = run_synthetic_avatar_survey(args.question, demos, num_avatars=args.sample)
91
+
92
+ current_dir = os.path.dirname(os.path.abspath(__file__))
93
+ output_f = os.path.join(current_dir, "survey_results_synthetic.json")
94
+ with open(output_f, "w", encoding="utf-8") as f:
95
+ json.dump(survey_results, f, ensure_ascii=False, indent=4)
96
+ print(f"Synthetic simulation finished. Results saved to {output_f}")
consolidated_nbs_data.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ae6e667dc62aa831baa866b4de965a75bc3bca6468a4a32d0b669a2e1d52322
3
+ size 6986110
index.html ADDED
@@ -0,0 +1,736 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="ko">
3
+
4
+ <head>
5
+ <meta charset="UTF-8">
6
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
+ <title>Persona Issue Survey(Beta version) - NBS Avatar System</title>
8
+ <link href="https://fonts.googleapis.com/css2?family=Pretendard:wght@400;500;700&display=swap" rel="stylesheet">
9
+ <style>
10
+ :root {
11
+ --primary: #6366f1;
12
+ --primary-dark: #4f46e5;
13
+ --secondary: #ec4899;
14
+ --bg: #f8fafc;
15
+ --sidebar-bg: #0f172a;
16
+ --card-bg: #ffffff;
17
+ --text-main: #1e293b;
18
+ --text-muted: #64748b;
19
+ --glass: rgba(255, 255, 255, 0.7);
20
+ }
21
+
22
+ * {
23
+ margin: 0;
24
+ padding: 0;
25
+ box-sizing: border-box;
26
+ font-family: 'Pretendard', sans-serif;
27
+ }
28
+
29
+ body {
30
+ background-color: var(--bg);
31
+ color: var(--text-main);
32
+ display: flex;
33
+ min-height: 100vh;
34
+ }
35
+
36
+ /* Sidebar Styling */
37
+ .sidebar {
38
+ width: 320px;
39
+ background-color: var(--sidebar-bg);
40
+ color: white;
41
+ padding: 2.5rem 1.5rem;
42
+ display: flex;
43
+ flex-direction: column;
44
+ position: fixed;
45
+ height: 100vh;
46
+ left: 0;
47
+ top: 0;
48
+ box-shadow: 4px 0 20px rgba(0, 0, 0, 0.1);
49
+ z-index: 100;
50
+ }
51
+
52
+ .logo {
53
+ font-size: 1.5rem;
54
+ font-weight: 700;
55
+ margin-bottom: 3rem;
56
+ background: linear-gradient(to right, #818cf8, #f472b6);
57
+ -webkit-background-clip: text;
58
+ -webkit-text-fill-color: transparent;
59
+ display: flex;
60
+ align-items: center;
61
+ gap: 10px;
62
+ }
63
+
64
+ .menu-item {
65
+ padding: 1rem 1.25rem;
66
+ border-radius: 12px;
67
+ margin-bottom: 0.5rem;
68
+ cursor: pointer;
69
+ transition: all 0.3s ease;
70
+ display: flex;
71
+ align-items: center;
72
+ gap: 12px;
73
+ color: #94a3b8;
74
+ font-weight: 500;
75
+ }
76
+
77
+ .menu-item:hover {
78
+ background: rgba(255, 255, 255, 0.05);
79
+ color: white;
80
+ }
81
+
82
+ .menu-item.active {
83
+ background: var(--primary);
84
+ color: white;
85
+ }
86
+
87
+ .api-key-section {
88
+ margin-top: auto;
89
+ padding: 1.5rem;
90
+ background: rgba(255, 255, 255, 0.05);
91
+ border-radius: 16px;
92
+ font-size: 0.875rem;
93
+ }
94
+
95
+ .api-key-label {
96
+ display: block;
97
+ margin-bottom: 0.75rem;
98
+ font-weight: 500;
99
+ color: #e2e8f0;
100
+ }
101
+
102
+ .api-key-container {
103
+ position: relative;
104
+ width: 100%;
105
+ margin-bottom: 1rem;
106
+ }
107
+
108
+ .api-key-input {
109
+ width: 100%;
110
+ padding: 0.75rem;
111
+ padding-right: 2.5rem;
112
+ /* Space for toggle icon */
113
+ border-radius: 8px;
114
+ border: 1px solid rgba(255, 255, 255, 0.2);
115
+ background: rgba(255, 255, 255, 0.05);
116
+ color: #f8fafc;
117
+ font-size: 0.85rem;
118
+ transition: all 0.2s;
119
+ }
120
+
121
+ .api-key-input:focus {
122
+ outline: none;
123
+ background: rgba(255, 255, 255, 0.1);
124
+ border-color: var(--primary);
125
+ box-shadow: 0 0 0 2px rgba(99, 102, 241, 0.3);
126
+ }
127
+
128
+ .toggle-password {
129
+ position: absolute;
130
+ right: 0.75rem;
131
+ top: 50%;
132
+ transform: translateY(-50%);
133
+ cursor: pointer;
134
+ color: #94a3b8;
135
+ font-size: 0.9rem;
136
+ user-select: none;
137
+ }
138
+
139
+ .toggle-password:hover {
140
+ color: white;
141
+ }
142
+
143
+ .key-guide {
144
+ color: #94a3b8;
145
+ font-size: 0.75rem;
146
+ line-height: 1.5;
147
+ }
148
+
149
+ .key-guide a {
150
+ color: #818cf8;
151
+ text-decoration: none;
152
+ }
153
+
154
+ /* Main Content Styling */
155
+ .main-content {
156
+ flex: 1;
157
+ margin-left: 320px;
158
+ padding: 3rem;
159
+ max-width: 1200px;
160
+ }
161
+
162
+ .header-box {
163
+ margin-bottom: 3rem;
164
+ }
165
+
166
+ .title {
167
+ font-size: 2.25rem;
168
+ font-weight: 700;
169
+ margin-bottom: 1rem;
170
+ color: #0f172a;
171
+ }
172
+
173
+ .description {
174
+ color: var(--text-muted);
175
+ line-height: 1.6;
176
+ font-size: 1.1rem;
177
+ }
178
+
179
+ .system-info {
180
+ display: grid;
181
+ grid-template-columns: repeat(3, 1fr);
182
+ gap: 1.5rem;
183
+ margin-top: 2rem;
184
+ }
185
+
186
+ .info-card {
187
+ background: white;
188
+ padding: 1.5rem;
189
+ border-radius: 20px;
190
+ box-shadow: 0 4px 15px rgba(0, 0, 0, 0.03);
191
+ border: 1px solid #f1f5f9;
192
+ }
193
+
194
+ .info-card h4 {
195
+ margin-bottom: 0.5rem;
196
+ color: var(--primary);
197
+ }
198
+
199
+ .info-card p {
200
+ font-size: 0.9rem;
201
+ color: var(--text-muted);
202
+ line-height: 1.5;
203
+ }
204
+
205
+ /* Form Styling */
206
+ .survey-form {
207
+ background: white;
208
+ padding: 2.5rem;
209
+ border-radius: 24px;
210
+ box-shadow: 0 10px 40px rgba(0, 0, 0, 0.04);
211
+ margin-bottom: 3rem;
212
+ border: 1px solid #f1f5f9;
213
+ }
214
+
215
+ .form-grid {
216
+ display: grid;
217
+ grid-template-columns: repeat(2, 1fr);
218
+ gap: 1.5rem;
219
+ margin-bottom: 2rem;
220
+ }
221
+
222
+ .input-group {
223
+ display: flex;
224
+ flex-direction: column;
225
+ gap: 0.5rem;
226
+ }
227
+
228
+ .input-group.full {
229
+ grid-column: span 2;
230
+ }
231
+
232
+ label {
233
+ font-weight: 600;
234
+ font-size: 0.9rem;
235
+ color: #475569;
236
+ }
237
+
238
+ select,
239
+ input[type="text"],
240
+ input[type="number"],
241
+ textarea {
242
+ padding: 0.875rem;
243
+ border-radius: 12px;
244
+ border: 1px solid #e2e8f0;
245
+ background: #f8fafc;
246
+ font-size: 1rem;
247
+ transition: all 0.2s;
248
+ }
249
+
250
+ select:focus,
251
+ input:focus,
252
+ textarea:focus {
253
+ outline: none;
254
+ border-color: var(--primary);
255
+ box-shadow: 0 0 0 4px rgba(99, 102, 241, 0.1);
256
+ background: white;
257
+ }
258
+
259
+ textarea {
260
+ resize: vertical;
261
+ min-height: 100px;
262
+ }
263
+
264
+ .btn-submit {
265
+ background: linear-gradient(135deg, var(--primary), var(--primary-dark));
266
+ color: white;
267
+ padding: 1rem 2rem;
268
+ border-radius: 14px;
269
+ border: none;
270
+ font-weight: 600;
271
+ font-size: 1.1rem;
272
+ cursor: pointer;
273
+ width: 100%;
274
+ transition: all 0.3s;
275
+ box-shadow: 0 4px 15px rgba(79, 70, 229, 0.3);
276
+ }
277
+
278
+ .btn-submit:hover {
279
+ transform: translateY(-2px);
280
+ box-shadow: 0 8px 25px rgba(79, 70, 229, 0.4);
281
+ }
282
+
283
+ .btn-submit:active {
284
+ transform: translateY(0);
285
+ }
286
+
287
+ .btn-submit:disabled {
288
+ background: #cbd5e1;
289
+ box-shadow: none;
290
+ cursor: not-allowed;
291
+ }
292
+
293
+ .btn-reset {
294
+ background: white;
295
+ color: #64748b;
296
+ padding: 1rem 1.5rem;
297
+ border-radius: 14px;
298
+ border: 1px solid #e2e8f0;
299
+ font-weight: 600;
300
+ font-size: 1.1rem;
301
+ cursor: pointer;
302
+ transition: all 0.2s;
303
+ }
304
+
305
+ .btn-reset:hover {
306
+ background: #f8fafc;
307
+ border-color: #cbd5e1;
308
+ color: #1e293b;
309
+ }
310
+
311
+ .button-row {
312
+ display: flex;
313
+ gap: 1rem;
314
+ }
315
+
316
+ /* Results Styling */
317
+ .results-container {
318
+ display: flex;
319
+ flex-direction: column;
320
+ gap: 1.5rem;
321
+ }
322
+
323
+ .result-card {
324
+ background: white;
325
+ padding: 2rem;
326
+ border-radius: 20px;
327
+ box-shadow: 0 4px 20px rgba(0, 0, 0, 0.03);
328
+ border: 1px solid #f1f5f9;
329
+ animation: slideIn 0.5s ease forwards;
330
+ opacity: 0;
331
+ }
332
+
333
+ @keyframes slideIn {
334
+ from {
335
+ opacity: 0;
336
+ transform: translateY(20px);
337
+ }
338
+
339
+ to {
340
+ opacity: 1;
341
+ transform: translateY(0);
342
+ }
343
+ }
344
+
345
+ .avatar-info {
346
+ display: flex;
347
+ align-items: center;
348
+ gap: 1rem;
349
+ margin-bottom: 1.5rem;
350
+ padding-bottom: 1rem;
351
+ border-bottom: 1px solid #f1f5f9;
352
+ }
353
+
354
+ .avatar-icon {
355
+ width: 48px;
356
+ height: 48px;
357
+ background: #e0e7ff;
358
+ border-radius: 12px;
359
+ display: flex;
360
+ align-items: center;
361
+ justify-content: center;
362
+ color: var(--primary);
363
+ font-weight: 700;
364
+ }
365
+
366
+ .avatar-meta {
367
+ font-size: 0.85rem;
368
+ color: var(--text-muted);
369
+ }
370
+
371
+ .response-text {
372
+ line-height: 1.8;
373
+ font-size: 1.05rem;
374
+ color: #334155;
375
+ white-space: pre-wrap;
376
+ }
377
+
378
+ .context-chips {
379
+ display: flex;
380
+ flex-wrap: wrap;
381
+ gap: 0.5rem;
382
+ margin-top: 1.5rem;
383
+ }
384
+
385
+ .chip {
386
+ padding: 0.25rem 0.75rem;
387
+ background: #f1f5f9;
388
+ border-radius: 20px;
389
+ font-size: 0.75rem;
390
+ color: var(--text-muted);
391
+ }
392
+
393
+ .loader {
394
+ display: none;
395
+ text-align: center;
396
+ padding: 2rem;
397
+ }
398
+
399
+ .spinner {
400
+ width: 40px;
401
+ height: 40px;
402
+ border: 4px solid #f3f3f3;
403
+ border-top: 4px solid var(--primary);
404
+ border-radius: 50%;
405
+ animation: spin 1s linear infinite;
406
+ margin: 0 auto 1rem;
407
+ }
408
+
409
+ @keyframes spin {
410
+ 0% {
411
+ transform: rotate(0deg);
412
+ }
413
+
414
+ 100% {
415
+ transform: rotate(360deg);
416
+ }
417
+ }
418
+
419
+ .notice-label {
420
+ font-size: 0.8rem;
421
+ color: var(--secondary);
422
+ font-weight: 500;
423
+ margin-top: 0.5rem;
424
+ }
425
+ </style>
426
+ </head>
427
+
428
+ <body>
429
+
430
+ <aside class="sidebar">
431
+ <div class="logo">
432
+ <span>โœจ</span> Issue Survey (Beta)
433
+ </div>
434
+
435
+ <div class="menu-item active" id="menu-actual" onclick="switchMode('actual')">
436
+ <span>๐Ÿ‘ค</span> Actual Respondent
437
+ </div>
438
+ <div class="menu-item" id="menu-synthetic" onclick="switchMode('synthetic')">
439
+ <span>๐Ÿ“Š</span> Statistical Synthetic
440
+ </div>
441
+
442
+ <div class="api-key-section">
443
+ <label class="api-key-label">Gemini API Key</label>
444
+ <div class="api-key-container">
445
+ <input type="password" id="api-key" class="api-key-input" placeholder="์—ฌ๊ธฐ์— API ํ‚ค ์ž…๋ ฅ">
446
+ <span class="toggle-password" id="toggle-key" onclick="toggleKeyVisibility()">๐Ÿ‘๏ธ</span>
447
+ </div>
448
+ <div class="key-guide">
449
+ <p>Gemini API ํ‚ค๊ฐ€ ์—†์œผ์‹ ๊ฐ€์š”?</p>
450
+ <p><a href="https://aistudio.google.com/app/apikey" target="_blank">Google AI Studio</a>์—์„œ ๋ฌด๋ฃŒ๋กœ ๋ฐœ๊ธ‰๋ฐ›์œผ์‹ค ์ˆ˜
451
+ ์žˆ์Šต๋‹ˆ๋‹ค.</p>
452
+ </div>
453
+ </div>
454
+ </aside>
455
+
456
+ <main class="main-content">
457
+ <div class="header-box">
458
+ <h1 class="title">Persona Issue Survey (Beta version)</h1>
459
+ <p class="description">2020๋…„ 7์›”(1์ฐจ)๋ถ€ํ„ฐ 2025๋…„ 9์›”(164์ฐจ)๊นŒ์ง€ ์ˆ˜ํ–‰๋œ <b>์ „๊ตญ์ง€ํ‘œ์กฐ์‚ฌ(NBS)</b> ๋ฐ์ดํ„ฐ๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ ํŠน์ • ๊ณ„์ธต์˜ ๋ชฉ์†Œ๋ฆฌ๋ฅผ
460
+ ์‹œ๋ฎฌ๋ ˆ์ด์…˜ํ•ฉ๋‹ˆ๋‹ค.</p>
461
+ <p style="margin-top: 0.5rem; font-size: 0.85rem; color: #f43f5e; font-weight: 500;">โš ๏ธ ๋ณธ ์‹œ์Šคํ…œ์€ ์‚ฌํšŒ์  ์ด์Šˆ ๋ถ„์„์šฉ์ด๋ฉฐ
462
+ ์ธ๋ฌผ ํ‰๊ฐ€์—๋Š” ๋ถ€์ ํ•ฉํ•ฉ๋‹ˆ๋‹ค. ์‹œ๋ฎฌ๋ ˆ์ด์…˜ ๊ฒฐ๊ณผ๋Š” ์ฐธ๊ณ ์šฉ์œผ๋กœ๋งŒ ์‚ฌ์šฉํ•˜์‹ญ์‹œ์˜ค.</p>
463
+
464
+ <div class="system-info">
465
+ <div class="info-card">
466
+ <h4>๐Ÿ‘ค Actual vs ๐Ÿ“Š Synthetic</h4>
467
+ <p><b>Actual</b>: ์‹ค์ œ ๊ฐœ๋ณ„ ์‘๋‹ต์ž์˜ ์ด๋ ฅ๊ณผ ๊ฐ€์น˜๊ด€์„ ์ถ”์ ํ•˜์—ฌ ์ƒ์ƒํ•œ ๊ฐœ์ธ์˜ ๋ชฉ์†Œ๋ฆฌ๋ฅผ ์‹œ๋‚˜๋ฆฌ์˜คํ™” ํ•ฉ๋‹ˆ๋‹ค. (์‹ฌ์ธต ์ธํ„ฐ๋ทฐ ๋Œ€์šฉ)<br>
468
+ <b>Synthetic</b>: ๊ทธ๋ฃน ์ „์ฒด์˜ ํ†ต๊ณ„์  ๋‹ต๋ณ€ ๋ถ„ํฌ๋ฅผ ์š”์•ฝํ•˜์—ฌ ์ง‘๋‹จ์˜ ํ‰๊ท ์ ์ด๊ณ  ์ „ํ˜•์ ์ธ ๊ฒฝํ–ฅ์„ฑ์„ ๋„์ถœํ•ฉ๋‹ˆ๋‹ค. (์—ฌ๋ก  ์ง€ํ˜• ๋ถ„์„์šฉ)
469
+ </p>
470
+ </div>
471
+ <div class="info-card">
472
+ <h4>๐Ÿ›  ๊ตฌํ˜„ ๊ณผ์ •</h4>
473
+ <p>163ํšŒ์˜ NBS ์„ค๋ฌธ(16๋งŒ๋ช…)์„ ํ†ตํ•ฉ ์ „์ฒ˜๋ฆฌํ•˜๊ณ , SBERT ์˜๋ฏธ๋ก ์  ๊ฒ€์ƒ‰์„ ํ†ตํ•ด ์•„๋ฐ”ํƒ€์˜ '๊ธฐ์–ต'์„ AI์—๊ฒŒ ์ฃผ์ž…ํ•ฉ๋‹ˆ๋‹ค.</p>
474
+ </div>
475
+ <div class="info-card">
476
+ <h4>๐Ÿ’ก ์‚ฌ์šฉ๋ฒ•</h4>
477
+ <p>1. <b>API ํ‚ค๋ฅผ ๋จผ์ € ์ž…๋ ฅ</b>ํ•˜์„ธ์š”. 2. ์ธ๊ตฌํ†ต๊ณ„ ์กฐ๊ฑด์„ ์„ค์ •ํ•˜๊ณ  ์ด์Šˆ ์งˆ๋ฌธ์„ ์ž…๋ ฅ(์ตœ์ €์ž„๊ธˆ ์‚ฌ๋ก€ ๋“ฑ ๊ตฌ์ฒด์  ๋ฐฐ๊ฒฝ ๊ถŒ์žฅ) ํ›„ ์‹œ์ž‘ํ•˜์„ธ์š”.</p>
478
+ </div>
479
+ </div>
480
+ </div>
481
+
482
+ <div class="survey-form">
483
+ <div class="form-grid">
484
+ <div class="input-group">
485
+ <label>์ง€์—ญ</label>
486
+ <select id="region">
487
+ <option value="">์ „์ฒด (์ „๊ตญ)</option>
488
+ <option value="์„œ์šธ">์„œ์šธ</option>
489
+ <option value="๊ฒฝ๊ธฐ">๊ฒฝ๊ธฐ</option>
490
+ <option value="์ธ์ฒœ">์ธ์ฒœ</option>
491
+ <option value="๋ถ€์‚ฐ">๋ถ€์‚ฐ</option>
492
+ <option value="๋Œ€๊ตฌ">๋Œ€๊ตฌ</option>
493
+ <option value="๊ด‘์ฃผ">๊ด‘์ฃผ</option>
494
+ <option value="๋Œ€์ „">๋Œ€์ „</option>
495
+ <option value="์šธ์‚ฐ">์šธ์‚ฐ</option>
496
+ <option value="์„ธ์ข…">์„ธ์ข…</option>
497
+ <option value="์ถฉ๋ถ">์ถฉ๋ถ</option>
498
+ <option value="์ถฉ๋‚จ">์ถฉ๋‚จ</option>
499
+ <option value="์ „๋ถ">์ „๋ถ</option>
500
+ <option value="์ „๋‚จ">์ „๋‚จ</option>
501
+ <option value="๊ฒฝ๋ถ">๊ฒฝ๋ถ</option>
502
+ <option value="๊ฒฝ๋‚จ">๊ฒฝ๋‚จ</option>
503
+ <option value="์ œ์ฃผ">์ œ์ฃผ</option>
504
+ <option value="๊ฐ•์›">๊ฐ•์›</option>
505
+ </select>
506
+ </div>
507
+ <div class="input-group">
508
+ <label>์„ฑ๋ณ„</label>
509
+ <select id="gender">
510
+ <option value="">์ „์ฒด (์„ฑ๋ณ„๋ฌด๊ด€)</option>
511
+ <option value="๋‚จ์ž">๋‚จ์ž</option>
512
+ <option value="์—ฌ์ž">์—ฌ์ž</option>
513
+ </select>
514
+ </div>
515
+ <div class="input-group">
516
+ <label>์—ฐ๋ น๋Œ€</label>
517
+ <select id="age">
518
+ <option value="">์ „์ฒด (์—ฐ๋ น๋ฌด๊ด€)</option>
519
+ <option value="18~29">20๋Œ€ (18~29)</option>
520
+ <option value="30~39">30๋Œ€ (30~39)</option>
521
+ <option value="40~49">40๋Œ€ (40~49)</option>
522
+ <option value="50~59">50๋Œ€ (50~59)</option>
523
+ <option value="60~69">60๋Œ€ (60~69)</option>
524
+ <option value="70~99">70๋Œ€ ์ด์ƒ</option>
525
+ </select>
526
+ </div>
527
+ <div class="input-group">
528
+ <label>๊ตฌ์ฒด์  ์—ฐ๋ น (์„ ํƒ)</label>
529
+ <input type="number" id="age-specific" placeholder="์˜ˆ: 32">
530
+ </div>
531
+ <div class="input-group">
532
+ <label>์ง์—…</label>
533
+ <select id="job">
534
+ <option value="">์ „์ฒด (์ง์—…๋ฌด๊ด€)</option>
535
+ <option value="ํ•™์ƒ">ํ•™์ƒ</option>
536
+ <option value="์‚ฌ๋ฌด/๊ธฐ์ˆ ์ง">์‚ฌ๋ฌด/๊ธฐ์ˆ ์ง</option>
537
+ <option value="์ž์˜์—…">์ž์˜์—…</option>
538
+ <option value="์ฃผ๋ถ€">์ฃผ๋ถ€</option>
539
+ <option value="๊ฒฝ์˜/๊ด€๋ฆฌ/์ „๋ฌธ์ง">๊ฒฝ์˜/๊ด€๋ฆฌ/์ „๋ฌธ์ง</option>
540
+ <option value="์ƒ์‚ฐ/๊ธฐ๋Šฅ/๋…ธ๋ฌด์ง">์ƒ์‚ฐ/๊ธฐ๋Šฅ/๋…ธ๋ฌด์ง</option>
541
+ <option value="๋†/๋ฆผ/์ˆ˜์‚ฐ์—…">๋†/๋ฆผ/์ˆ˜์‚ฐ์—…</option>
542
+ <option value="๋ฌด์ง/ํ‡ด์ง/๊ธฐํƒ€">๋ฌด์ง/ํ‡ด์ง/๊ธฐํƒ€</option>
543
+ </select>
544
+ </div>
545
+ <div class="input-group full">
546
+ <label>๋ถ„์„ ์ด์Šˆ (์งˆ๋ฌธ)</label>
547
+ <textarea id="question"
548
+ placeholder="์˜ˆ: ์ตœ๊ทผ ๊ณ ๋ฌผ๊ฐ€ ์ƒํ™ฉ์—์„œ ์„œ๋ฏผ ๊ฒฝ์ œ ์•ˆ์ •์„ ์œ„ํ•ด ์ตœ์ €์ž„๊ธˆ์„ ์ธ์ƒํ•ด์•ผ ํ•œ๋‹ค๋Š” ์ฃผ์žฅ์— ์–ผ๋งˆ๋‚˜ ๋™์˜ํ•˜์‹ญ๋‹ˆ๊นŒ? (1: ์ „ํ˜€ ๋™์˜ ์•ˆํ•จ ~ 5: ๋งค์šฐ ๋™์˜)"></textarea>
549
+ </div>
550
+ <div class="input-group" id="sample-group">
551
+ <label>์ƒ์„ฑํ•  ์•„๋ฐ”ํƒ€(์‘๋‹ต) ์ˆ˜</label>
552
+ <input type="number" id="sample" value="3" min="1" max="10">
553
+ <div id="sample-notice" class="notice-label">Actual ๋ชจ๋“œ๋Š” ์ตœ๋Œ€ 10๋ช…์œผ๋กœ ์ œํ•œ๋ฉ๋‹ˆ๋‹ค.</div>
554
+ </div>
555
+ </div>
556
+ <div class="button-row">
557
+ <button class="btn-submit" id="btn-submit" onclick="runSimulation()">์‹œ๋ฎฌ๋ ˆ์ด์…˜ ์‹œ์ž‘</button>
558
+ <button class="btn-reset" id="btn-reset" onclick="resetForm()">์ดˆ๊ธฐํ™”</button>
559
+ </div>
560
+ </div>
561
+
562
+ <div class="loader" id="loader">
563
+ <div class="spinner"></div>
564
+ <p>๋ฐ์ดํ„ฐ์—์„œ ํŽ˜๋ฅด์†Œ๋‚˜๋ฅผ ์ถ”์ถœํ•˜๊ณ  AI ๋‹ต๋ณ€์„ ์ƒ์„ฑํ•˜๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค...</p>
565
+ </div>
566
+
567
+ <div class="results-container" id="results">
568
+ <!-- Results will be injected here -->
569
+ </div>
570
+ </main>
571
+
572
+ <script>
573
+ let currentMode = 'actual';
574
+
575
+ function toggleKeyVisibility() {
576
+ const keyInput = document.getElementById('api-key');
577
+ const toggleIcon = document.getElementById('toggle-key');
578
+ if (keyInput.type === 'password') {
579
+ keyInput.type = 'text';
580
+ toggleIcon.innerText = '๐Ÿ™ˆ';
581
+ } else {
582
+ keyInput.type = 'password';
583
+ toggleIcon.innerText = '๐Ÿ‘๏ธ';
584
+ }
585
+ }
586
+
587
+ function switchMode(mode) {
588
+ currentMode = mode;
589
+ document.querySelectorAll('.menu-item').forEach(el => el.classList.remove('active'));
590
+ document.getElementById(`menu-${mode}`).classList.add('active');
591
+
592
+ const sampleGroup = document.getElementById('sample-group');
593
+ const sampleInput = document.getElementById('sample');
594
+ const notice = document.getElementById('sample-notice');
595
+
596
+ if (mode === 'actual') {
597
+ sampleGroup.style.display = "flex";
598
+ sampleInput.max = 10;
599
+ if (sampleInput.value > 10) sampleInput.value = 10;
600
+ notice.innerText = "Actual ๋ชจ๋“œ๋Š” ์ตœ๋Œ€ 10๋ช…์œผ๋กœ ์ œํ•œ๋ฉ๋‹ˆ๋‹ค. (๋Œ€๊ทœ๋ชจ ๋ถ„์„์€ Synthetic ์ถ”์ฒœ)";
601
+ notice.style.color = "#ec4899";
602
+ } else {
603
+ // Synthetic is always 1 summarized avatar per request in current implementation
604
+ sampleGroup.style.display = "none";
605
+ sampleInput.value = 1;
606
+ }
607
+ }
608
+
609
+ function resetForm() {
610
+ document.getElementById('question').value = "";
611
+ document.getElementById('region').value = "";
612
+ document.getElementById('gender').value = "";
613
+ document.getElementById('age').value = "";
614
+ document.getElementById('age-specific').value = "";
615
+ document.getElementById('job').value = "";
616
+ document.getElementById('sample').value = (currentMode === 'actual' ? "3" : "1");
617
+ document.getElementById('results').innerHTML = "";
618
+ }
619
+
620
+ async function runSimulation() {
621
+ const apiKey = document.getElementById('api-key').value.trim();
622
+ const question = document.getElementById('question').value.trim();
623
+ const region = document.getElementById('region').value;
624
+ const gender = document.getElementById('gender').value;
625
+
626
+ // Priority: specific age > age range
627
+ const ageSpecific = document.getElementById('age-specific').value.trim();
628
+ const ageRange = document.getElementById('age').value;
629
+ const age = ageSpecific || ageRange;
630
+
631
+ const job = document.getElementById('job').value;
632
+ const sample = parseInt(document.getElementById('sample').value);
633
+
634
+ if (!apiKey) {
635
+ alert("์‹œ์ž‘ํ•˜๋ ค๋ฉด Gemini API ํ‚ค๋ฅผ ์ž…๋ ฅํ•ด ์ฃผ์„ธ์š” (์‚ฌ์ด๋“œ๋ฐ” ํ•˜๋‹จ).");
636
+ return;
637
+ }
638
+
639
+ if (!question) {
640
+ alert("์งˆ๋ฌธ์„ ์ž…๋ ฅํ•ด ์ฃผ์„ธ์š”.");
641
+ return;
642
+ }
643
+
644
+ const resultsDiv = document.getElementById('results');
645
+ const loader = document.getElementById('loader');
646
+ const btn = document.getElementById('btn-submit');
647
+
648
+ resultsDiv.innerHTML = "";
649
+ loader.style.display = "block";
650
+ btn.disabled = true;
651
+
652
+ const payload = {
653
+ question,
654
+ region: region || null,
655
+ gender: gender || null,
656
+ age: age || null,
657
+ job: job || null,
658
+ sample
659
+ };
660
+
661
+ const endpoint = `/simulate/${currentMode}`;
662
+
663
+ try {
664
+ const response = await fetch(endpoint, {
665
+ method: 'POST',
666
+ headers: {
667
+ 'Content-Type': 'application/json',
668
+ 'X-API-Key': apiKey || ""
669
+ },
670
+ body: JSON.stringify(payload)
671
+ });
672
+
673
+ if (!response.ok) {
674
+ const err = await response.json();
675
+ throw new Error(err.detail || "์‹œ๋ฎฌ๋ ˆ์ด์…˜ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค.");
676
+ }
677
+
678
+ const data = await response.json();
679
+ renderResults(data);
680
+ } catch (error) {
681
+ alert(error.message);
682
+ } finally {
683
+ loader.style.display = "none";
684
+ btn.disabled = false;
685
+ }
686
+ }
687
+
688
+ function renderResults(data) {
689
+ const resultsDiv = document.getElementById('results');
690
+
691
+ if (currentMode === 'synthetic') {
692
+ resultsDiv.innerHTML = `<h2 style="margin-bottom: 1.5rem;">์ง‘๋‹จ ๋Œ€ํ‘œ(Representative) ๋ถ„์„ ๊ฒฐ๊ณผ</h2>`;
693
+ } else {
694
+ resultsDiv.innerHTML = `<h2 style="margin-bottom: 1.5rem;">์ด ${data.length}๋ช…์˜ ๋‹ต๋ณ€ ๊ฒฐ๊ณผ</h2>`;
695
+ }
696
+
697
+ data.forEach((item, index) => {
698
+ const card = document.createElement('div');
699
+ card.className = "result-card";
700
+ card.style.animationDelay = `${index * 0.1}s`;
701
+
702
+ const isSynthetic = !item.respondent_id;
703
+ const idLabel = item.respondent_id ? `ID: ${item.respondent_id}` : `Group Representative (Representative)`;
704
+ const roundLabel = item.survey_round ? ` | Survey Round: ${item.survey_round}` : "";
705
+
706
+ let demographicsStr = "";
707
+ if (item.demographics) {
708
+ const d = item.demographics;
709
+ demographicsStr = `${d.region || '์ „๊ตญ'} / ${d.age || '์—ฐ๋ น๋Œ€'} / ${d.gender || '์„ฑ๋ณ„'} / ${d.job || '์ง์—…'}`;
710
+ }
711
+
712
+ const contextHtml = (item.referenced_context || item.referenced_stat_context || [])
713
+ .map(c => `<span class="chip">${c.split('\n')[0].replace('Q: ', '')}</span>`)
714
+ .join('');
715
+
716
+ card.innerHTML = `
717
+ <div class="avatar-info">
718
+ <div class="avatar-icon">${item.respondent_id ? 'P' : 'S'}</div>
719
+ <div>
720
+ <div style="font-weight: 700; color: #0f172a;">${idLabel}${roundLabel}</div>
721
+ <div class="avatar-meta">${demographicsStr}</div>
722
+ </div>
723
+ </div>
724
+ <div class="response-text">${item.response}</div>
725
+ <div class="api-key-label" style="margin-top: 1.5rem; font-size: 0.75rem; color: #94a3b8;">์ฐธ์กฐ๋œ ๊ณผ๊ฑฐ ๋ฐ์ดํ„ฐ ํ•„๋“œ:</div>
726
+ <div class="context-chips">
727
+ ${contextHtml}
728
+ </div>
729
+ `;
730
+ resultsDiv.appendChild(card);
731
+ });
732
+ }
733
+ </script>
734
+ </body>
735
+
736
+ </html>
nbs_questions_index.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fdd94e1940b369064abfc09632d968529e29a61247a07f1586dd9941fd6591b
3
+ size 6425603
rag_engine.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ import pandas as pd
4
+ import numpy as np
5
+ from sklearn.metrics.pairwise import cosine_similarity
6
+ from sentence_transformers import SentenceTransformer
7
+ import google.generativeai as genai
8
+ from dotenv import load_dotenv
9
+
10
+ # Look for .env in current and parent directories
11
+ load_dotenv()
12
+ if not os.getenv("GEMINI_KEY"):
13
+ load_dotenv(os.path.join(os.path.dirname(__file__), '..', '.env'))
14
+
15
+ class NBSRagEngine:
16
+ def __init__(self, parquet_path, db_path, model_name):
17
+ print("Initializing NBSRagEngine...")
18
+ print("Initializing NBSRagEngine (Stable Mode)...")
19
+ self.df = pd.read_parquet(parquet_path)
20
+ # Ensure age is numeric for range filtering
21
+ if 'age' in self.df.columns:
22
+ self.df['age'] = pd.to_numeric(self.df['age'], errors='coerce')
23
+
24
+ # Load stable question index
25
+ idx_path = db_path # Now interpreted as the .parquet index file
26
+ if not os.path.exists(idx_path):
27
+ # Fallback for old pathing
28
+ idx_path = os.path.join(os.path.dirname(idx_path), "nbs_questions_index.parquet")
29
+
30
+ self.q_df = pd.read_parquet(idx_path)
31
+ self.q_vectors = np.array(self.q_df['vector'].tolist())
32
+ self.questions = self.q_df['question'].tolist()
33
+
34
+ self.model = SentenceTransformer(model_name)
35
+
36
+ # Configure Gemini
37
+ gemini_key = os.getenv("GEMINI_KEY")
38
+ if not gemini_key:
39
+ raise ValueError("GEMINI_KEY not found in .env")
40
+ genai.configure(api_key=gemini_key)
41
+ # Using a stable flash model with fallback
42
+ try:
43
+ self.llm = genai.GenerativeModel('gemini-2.5-flash-lite')
44
+ print("Using Gemini 2.5 Flash model.")
45
+ except Exception as e:
46
+ print(f"Failed to load Gemini 2.5 Flash: {e}. Falling back to Gemini 1.0 Pro.")
47
+ self.llm = genai.GenerativeModel('gemini-1.0-pro')
48
+
49
+ def find_similar_questions(self, target_question, top_k=5):
50
+ query_vec = self.model.encode([target_question])
51
+ sims = cosine_similarity(query_vec, self.q_vectors)[0]
52
+ top_indices = np.argsort(sims)[-top_k:][::-1]
53
+ return [self.questions[i] for i in top_indices]
54
+
55
+ def normalize_input(self, gender=None, region=None):
56
+ norm_gender = None
57
+ if gender:
58
+ g = str(gender).strip()
59
+ if g in ['๋‚จ', '๋‚จ์ž', '๋‚จ์„ฑ']: norm_gender = '๋‚จ์ž'
60
+ elif g in ['์—ฌ', '์—ฌ์ž', '์—ฌ์„ฑ']: norm_gender = '์—ฌ์ž'
61
+ else: norm_gender = g
62
+
63
+ norm_region = None
64
+ if region:
65
+ r = str(region).strip()
66
+ # Simple alias map
67
+ aliases = {
68
+ '์„œ์šธ': '์„œ์šธ', '์„œ์šธํŠน๋ณ„์‹œ': '์„œ์šธ',
69
+ '๊ฒฝ๊ธฐ': '๊ฒฝ๊ธฐ', '๊ฒฝ๊ธฐ๋„': '๊ฒฝ๊ธฐ',
70
+ '์ธ์ฒœ': '์ธ์ฒœ', '์ธ์ฒœ๊ด‘์—ญ์‹œ': '์ธ์ฒœ',
71
+ '๋ถ€์‚ฐ': '๋ถ€์‚ฐ', '๋ถ€์‚ฐ๊ด‘์—ญ์‹œ': '๋ถ€์‚ฐ',
72
+ '๋Œ€๊ตฌ': '๋Œ€๊ตฌ', '๋Œ€๊ตฌ๊ด‘์—ญ์‹œ': '๋Œ€๊ตฌ',
73
+ '๊ด‘์ฃผ': '๊ด‘์ฃผ', '๊ด‘์ฃผ๊ด‘์—ญ์‹œ': '๊ด‘์ฃผ',
74
+ '๋Œ€์ „': '๋Œ€์ „', '๋Œ€์ „๊ด‘์—ญ์‹œ': '๋Œ€์ „',
75
+ '์šธ์‚ฐ': '์šธ์‚ฐ', '์šธ์‚ฐ๊ด‘์—ญ์‹œ': '์šธ์‚ฐ',
76
+ '์„ธ์ข…': '์„ธ์ข…', '์„ธ์ข…ํŠน๋ณ„์ž์น˜์‹œ': '์„ธ์ข…',
77
+ '๊ฐ•์›': '๊ฐ•์›', '๊ฐ•์›๋„': '๊ฐ•์›',
78
+ '์ถฉ๋ถ': '์ถฉ๋ถ', '์ถฉ์ฒญ๋ถ๋„': '์ถฉ๋ถ',
79
+ '์ถฉ๋‚จ': '์ถฉ๋‚จ', '์ถฉ์ฒญ๋‚จ๋„': '์ถฉ๋‚จ',
80
+ '์ „๋ถ': '์ „๋ถ', '์ „๋ผ๋ถ๋„': '์ „๋ถ',
81
+ '์ „๋‚จ': '์ „๋‚จ', '์ „๋ผ๋‚จ๋„': '์ „๋‚จ',
82
+ '๊ฒฝ๋ถ': '๊ฒฝ๋ถ', '๊ฒฝ์ƒ๋ถ๋„': '๊ฒฝ๋ถ',
83
+ '๊ฒฝ๋‚จ': '๊ฒฝ๋‚จ', '๊ฒฝ์ƒ๋‚จ๋„': '๊ฒฝ๋‚จ',
84
+ '์ œ์ฃผ': '์ œ์ฃผ', '์ œ์ฃผํŠน๋ณ„์ž์น˜๋„': '์ œ์ฃผ'
85
+ }
86
+ norm_region = aliases.get(r, r)
87
+
88
+ return norm_gender, norm_region
89
+
90
+ def filter_respondents(self, gender=None, age=None, region=None, job=None):
91
+ mask = pd.Series([True] * len(self.df))
92
+
93
+ # Normalize inputs to match Parquet categories
94
+ norm_gender, norm_region = self.normalize_input(gender, region)
95
+
96
+ if norm_gender:
97
+ mask &= (self.df['gender'] == norm_gender)
98
+ if age:
99
+ age_str = str(age).strip()
100
+ if '~' in age_str:
101
+ try:
102
+ start_age, end_age = map(int, age_str.split('~'))
103
+ mask &= (self.df['age'] >= start_age) & (self.df['age'] <= end_age)
104
+ except ValueError:
105
+ # Fallback to exact match if format is wrong
106
+ mask &= (self.df['age'] == float(age_str))
107
+ else:
108
+ try:
109
+ mask &= (self.df['age'] == float(age_str))
110
+ except ValueError:
111
+ pass
112
+ if norm_region:
113
+ mask &= (self.df['region'] == norm_region)
114
+ if job:
115
+ mask &= (self.df['job'] == str(job).strip())
116
+
117
+ return self.df[mask]
118
+
119
+ def get_context_for_responder(self, responder_row, similar_questions):
120
+ """
121
+ Extracts actual answers for a given responder.
122
+ Includes both semantically similar questions and 'persona-defining' questions.
123
+ """
124
+ context = []
125
+ referenced_qs = set(similar_questions)
126
+
127
+ # 1. Add key persona columns if they exist in this row and aren't already included
128
+ persona_keywords = ['์ •์น˜', '์„ฑํ–ฅ', '์ด๋…', '์ง€์ง€', 'ํ›„๋ณด', '๊ฒฝ์ œ', '๋ถ€๋™์‚ฐ']
129
+ for col in responder_row.index:
130
+ if any(k in col for k in persona_keywords):
131
+ ans = responder_row[col]
132
+ if pd.notna(ans) and ans != "" and str(ans).strip() != "":
133
+ if col not in referenced_qs:
134
+ context.append(f"Q: {col}\nA: {ans}")
135
+ referenced_qs.add(col)
136
+
137
+ # 2. Add semantically similar questions
138
+ for q in similar_questions:
139
+ if q in responder_row.index:
140
+ ans = responder_row[q]
141
+ if pd.notna(ans) and ans != "" and str(ans).strip() != "":
142
+ # Only add if not already added by persona scan
143
+ if f"Q: {q}\nA: {ans}" not in context:
144
+ context.append(f"Q: {q}\nA: {ans}")
145
+
146
+ return "\n\n".join(context)
147
+
148
+ def generate_response(self, persona_desc, context, target_question, api_key=None):
149
+ if api_key:
150
+ # Temporary configuration for this call
151
+ genai.configure(api_key=api_key)
152
+ model = genai.GenerativeModel('gemini-2.5-flash-lite')
153
+ else:
154
+ model = self.llm
155
+
156
+ prompt = f"""๋‹น์‹ ์€ ๋‹ค์Œ๊ณผ ๊ฐ™์€ ํŽ˜๋ฅด์†Œ๋‚˜๋ฅผ ๊ฐ€์ง„ ํ•œ๊ตญ์ธ ์‘๋‹ต์ž์ž…๋‹ˆ๋‹ค.
157
+ ---
158
+ ํŽ˜๋ฅด์†Œ๋‚˜: {persona_desc}
159
+
160
+ ๊ณผ๊ฑฐ ๋‹น์‹ ์€ ์œ ์‚ฌํ•œ ์งˆ๋ฌธ์— ๋Œ€ํ•ด ๋‹ค์Œ๊ณผ ๊ฐ™์ด ๋‹ต๋ณ€ํ•œ ์ด๋ ฅ์ด ์žˆ์Šต๋‹ˆ๋‹ค:
161
+ {context}
162
+ ---
163
+
164
+ ์œ„์˜ ๊ฐ€์น˜๊ด€๊ณผ ๊ฒฝํ—˜์„ ๋ฐ”ํƒ•์œผ๋กœ, ๋‹ค์Œ์˜ ์ƒˆ๋กœ์šด ์งˆ๋ฌธ์— ๋Œ€ํ•ด ๋‹น์‹ ์ด ํ•  ๋ฒ•ํ•œ ๋‹ต๋ณ€์„ ์„ ํƒํ•˜๊ฑฐ๋‚˜ ์ž‘์„ฑํ•˜์‹ญ์‹œ์˜ค.
165
+ ๋‹ต๋ณ€์€ ์‹ค์ œ ์„ค๋ฌธ์กฐ์‚ฌ์— ์‘ํ•˜๋“ฏ ์ž์—ฐ์Šค๋Ÿฝ๊ณ  ๊ตฌ์ฒด์ ์ด์–ด์•ผ ํ•ฉ๋‹ˆ๋‹ค.
166
+
167
+ ์งˆ๋ฌธ: {target_question}
168
+
169
+ ๋‹ต๋ณ€:"""
170
+
171
+ response = model.generate_content(prompt)
172
+ return response.text
173
+
174
+ if __name__ == "__main__":
175
+ # Test block
176
+ script_dir = os.path.dirname(os.path.abspath(__file__))
177
+ parquet = os.path.join(script_dir, "consolidated_nbs_data.parquet")
178
+ # Point to the root directory for the stable DB
179
+ db = os.path.join(os.path.dirname(script_dir), "nbs_db_stable")
180
+ model = "snunlp/KR-SBERT-V40K-klueNLI-augSTS"
181
+
182
+ engine = NBSRagEngine(parquet, db, model)
183
+ sim_qs = engine.find_similar_questions("๋Œ€ํ†ต๋ น ๊ตญ์ • ์šด์˜์— ๋Œ€ํ•ด ์–ด๋–ป๊ฒŒ ์ƒ๊ฐํ•˜์‹ญ๋‹ˆ๊นŒ?")
184
+ print(f"Similar Questions: {sim_qs}")
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ pandas
4
+ pyarrow
5
+ numpy
6
+ scikit-learn
7
+ sentence-transformers
8
+ google-generativeai
9
+ python-dotenv
10
+ tqdm