SyngyeonTak commited on
Commit
e29b52a
ยท
1 Parent(s): 64fa191

region_extractor updates

Browse files
Files changed (1) hide show
  1. region_extractor.py +52 -2
region_extractor.py CHANGED
@@ -4,15 +4,25 @@ import faiss
4
  import numpy as np
5
  from sentence_transformers import SentenceTransformer
6
  from huggingface_hub import hf_hub_download
 
7
 
8
  DATA_REPO = "Syngyeon/seoulalpha-data"
9
  MODEL_NAME = "jhgan/ko-sbert-nli"
10
 
 
 
 
11
  # ๋กœ๋“œ
12
  def _load_region_index():
13
  try:
14
- index_path = hf_hub_download(repo_id=DATA_REPO, repo_type="dataset", filename="data/faiss/region_db/faiss_region_semantic.index")
15
- metadata_path = hf_hub_download(repo_id=DATA_REPO, repo_type="dataset", filename="data/faiss/region_db/metadata_region_semantic.jsonl")
 
 
 
 
 
 
16
 
17
  index = faiss.read_index(index_path)
18
  model = SentenceTransformer(MODEL_NAME)
@@ -45,3 +55,43 @@ def extract_region_semantic(user_query, top_k=5):
45
  if vid in region_meta:
46
  results.append(region_meta[vid]["region_name"])
47
  return results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  import numpy as np
5
  from sentence_transformers import SentenceTransformer
6
  from huggingface_hub import hf_hub_download
7
+ from openai import OpenAI # ๐Ÿ”น ์ถ”๊ฐ€
8
 
9
  DATA_REPO = "Syngyeon/seoulalpha-data"
10
  MODEL_NAME = "jhgan/ko-sbert-nli"
11
 
12
+ # OpenAI ํด๋ผ์ด์–ธํŠธ ์ดˆ๊ธฐํ™”
13
+ client = OpenAI(api_key=os.getenv("API_KEY")) # ๐Ÿ”น ์ถ”๊ฐ€
14
+
15
  # ๋กœ๋“œ
16
  def _load_region_index():
17
  try:
18
+ index_path = hf_hub_download(
19
+ repo_id=DATA_REPO, repo_type="dataset",
20
+ filename="data/faiss/region_db/faiss_region_semantic.index"
21
+ )
22
+ metadata_path = hf_hub_download(
23
+ repo_id=DATA_REPO, repo_type="dataset",
24
+ filename="data/faiss/region_db/metadata_region_semantic.jsonl"
25
+ )
26
 
27
  index = faiss.read_index(index_path)
28
  model = SentenceTransformer(MODEL_NAME)
 
55
  if vid in region_meta:
56
  results.append(region_meta[vid]["region_name"])
57
  return results
58
+
59
+
60
+ def extract_region_from_query(user_query):
61
+ """
62
+ ์‚ฌ์šฉ์ž ์งˆ๋ฌธ์—์„œ LLM์„ ์‚ฌ์šฉํ•ด ์ง€์—ญ๋ช… ํ‚ค์›Œ๋“œ ๋ฆฌ์ŠคํŠธ๋ฅผ ์ถ”์ถœํ•ฉ๋‹ˆ๋‹ค.
63
+ """
64
+ print("[LLM] ์‚ฌ์šฉ์ž ์ฟผ๋ฆฌ์—์„œ ์ง€์—ญ๋ช… ํ‚ค์›Œ๋“œ๋ฅผ ์ถ”์ถœํ•ฉ๋‹ˆ๋‹ค...")
65
+
66
+ system_prompt = """
67
+ ๋‹น์‹ ์€ ์‚ฌ์šฉ์ž์˜ ์—ฌํ–‰ ๊ด€๋ จ ์งˆ๋ฌธ์—์„œ '๋Œ€ํ•œ๋ฏผ๊ตญ ํ–‰์ •๊ตฌ์—ญ' ํ‚ค์›Œ๋“œ๋ฅผ ์ถ”์ถœํ•˜๋Š” AI ์–ด์‹œ์Šคํ„ดํŠธ์ž…๋‹ˆ๋‹ค.
68
+ ์‚ฌ์šฉ์ž์˜ ์งˆ๋ฌธ์„ ๋ถ„์„ํ•˜์—ฌ, ์ฃผ์†Œ ํ•„ํ„ฐ๋ง์— ์‚ฌ์šฉํ•  ์ˆ˜ ์žˆ๋Š” ํ‚ค์›Œ๋“œ ๋ชฉ๋ก์„ JSON ํ˜•์‹์œผ๋กœ ๋ฐ˜ํ™˜ํ•ด ์ฃผ์„ธ์š”.
69
+ ๊ฒฐ๊ณผ๋Š” ๋ฐ˜๋“œ์‹œ {"regions": ["ํ‚ค์›Œ๋“œ1", "ํ‚ค์›Œ๋“œ2", ...]} ํ˜•ํƒœ์—ฌ์•ผ ํ•ฉ๋‹ˆ๋‹ค.
70
+
71
+ - "์ „๋ผ๋„"๋Š” "์ „๋ถ", "์ „๋‚จ", "๊ด‘์ฃผ"๋กœ ํ•ด์„ํ•ฉ๋‹ˆ๋‹ค.
72
+ - "๊ฒฝ์ƒ๋„"๋Š” "๊ฒฝ๋ถ", "๊ฒฝ๋‚จ", "๋ถ€์‚ฐ", "๋Œ€๊ตฌ", "์šธ์‚ฐ"์œผ๋กœ ํ•ด์„ํ•ฉ๋‹ˆ๋‹ค.
73
+ - "์ถฉ์ฒญ๋„"๋Š” "์ถฉ๋ถ", "์ถฉ๋‚จ", "๋Œ€์ „", "์„ธ์ข…"์œผ๋กœ ํ•ด์„ํ•ฉ๋‹ˆ๋‹ค.
74
+ - "์„œ์šธ ๊ทผ๊ต"๋Š” "๊ฒฝ๊ธฐ", "์ธ์ฒœ"์œผ๋กœ ํ•ด์„ํ•ฉ๋‹ˆ๋‹ค.
75
+ - ์–ธ๊ธ‰๋œ ์ง€์—ญ์ด ์—†์œผ๋ฉด ๋นˆ ๋ฆฌ์ŠคํŠธ []๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
76
+ """
77
+
78
+ messages = [
79
+ {"role": "system", "content": system_prompt},
80
+ {"role": "user", "content": user_query}
81
+ ]
82
+
83
+ try:
84
+ response = client.chat.completions.create(
85
+ model="gpt-3.5-turbo",
86
+ messages=messages,
87
+ response_format={"type": "json_object"}
88
+ )
89
+ result = json.loads(response.choices[0].message.content)
90
+
91
+ if 'regions' in result and isinstance(result['regions'], list):
92
+ return result['regions']
93
+ else:
94
+ return []
95
+ except Exception as e:
96
+ print(f"[LLM] ์ง€์—ญ๋ช… ์ถ”์ถœ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
97
+ return []