ZORYE commited on
Commit
f00ed24
ยท
verified ยท
1 Parent(s): 2901036

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -19
app.py CHANGED
@@ -45,37 +45,40 @@ def add_watermark(image, text="gooditem gooditem gooditem gooditem gooditem"):
45
  combined = Image.alpha_composite(image.convert('RGBA'), watermark)
46
  return combined
47
 
48
- # ํ”„๋กœ๊ทธ๋žจ/์•„์ดํ…œ ์ •๋ณด ๋ธ”๋ก ์ถ”์ถœ
49
  def extract_info_block(article_text):
50
  pattern = r'(KBS2.*?<๋ชจ๋ธ๋ช…>.*?)(?:\n\n|$)'
51
  match = re.search(pattern, article_text, re.DOTALL)
 
52
  if match:
53
  return match.group(1).strip()
54
  else:
55
  return None
56
 
57
- # ๋ธŒ๋žœ๋“œ/๋ชจ๋ธ๋ช… ์—ฌ๋Ÿฌ ๊ฐœ ์ถ”์ถœ
 
58
  def extract_product_info(article_text):
59
  brand_match = re.search(r'<๋ธŒ๋žœ๋“œ>[ \t]*([^\n]+)', article_text)
60
- model_matches = re.findall(r'<๋ชจ๋ธ๋ช…>[ \t]*([^\n]+)', article_text)
61
-
62
  brand = brand_match.group(1).strip() if brand_match else None
63
- models = [m.strip() for m in model_matches] if model_matches else []
 
64
 
65
  result = []
66
 
67
- if brand and models:
68
  brand_words = re.findall(r'[A-Za-z]+', brand)
69
- selected_brand = ' '.join(brand_words[:2]) # ์•ž 2๋‹จ์–ด๋งŒ
70
 
71
- for model in models:
 
72
  search_query = f"{selected_brand} {model}"
73
  search_url = f"https://www.coupang.com/np/search?component=&q={search_query.replace(' ', '+')}"
74
  result.append((model, search_url))
75
-
76
  return result
77
 
78
- # ๋ฉ”์ธ ์ฒ˜๋ฆฌ ํ•จ์ˆ˜
 
79
  def process_url(tistory_url, output_dir):
80
  result = []
81
  try:
@@ -85,6 +88,7 @@ def process_url(tistory_url, output_dir):
85
  img_tags = soup.find_all('img')
86
  img_urls = [img['src'] for img in img_tags if 'src' in img.attrs]
87
 
 
88
  for idx, img_url in enumerate(img_urls):
89
  base_name = os.path.basename(img_url).split('?')[0]
90
  if base_name.startswith('new_ico'):
@@ -104,31 +108,33 @@ def process_url(tistory_url, output_dir):
104
  name, ext = os.path.splitext(base_name)
105
  save_path = os.path.join(output_dir, f"{name}_{idx+1}_processed.png")
106
  final_img.save(save_path)
 
107
 
108
- # ๋‹ค์šด๋กœ๋“œ ๋งํฌ ์ถ”๊ฐ€
109
- relative_path = save_path.replace("./", "/")
110
- result.append(f"๐Ÿ–ผ๏ธ [์ด๋ฏธ์ง€ ๋‹ค์šด๋กœ๋“œ]({relative_path})")
111
-
112
  article_text = soup.get_text()
113
 
 
114
  info_block = extract_info_block(article_text)
115
- product_infos = extract_product_info(article_text)
116
 
 
 
 
 
117
  if info_block:
118
  result.append("\n===== ํ”„๋กœ๊ทธ๋žจ ์ •๋ณด ๋ธ”๋ก =====")
119
  result.append(info_block)
120
  result.append("================================\n")
121
 
122
- if product_infos:
123
- for model_name, search_url in product_infos:
124
- result.append(f"๐Ÿ›๏ธ ์•„์ดํ…œ : {model_name}")
125
- result.append(f"๐Ÿ”— ์ฟ ํŒก ๋งํฌ: {search_url}")
126
 
127
  except Exception as e:
128
  result.append(f"โŒ URL ์ฒ˜๋ฆฌ ์‹คํŒจ: {tistory_url} / ์—๋Ÿฌ: {e}")
129
 
130
  return result
131
 
 
132
  # ์—ฌ๋Ÿฌ URL ์ฒ˜๋ฆฌ
133
  def process_multiple_urls(urls_text, output_dir):
134
  urls = [url.strip() for url in urls_text.strip().splitlines() if url.strip()]
 
45
  combined = Image.alpha_composite(image.convert('RGBA'), watermark)
46
  return combined
47
 
48
+
49
  def extract_info_block(article_text):
50
  pattern = r'(KBS2.*?<๋ชจ๋ธ๋ช…>.*?)(?:\n\n|$)'
51
  match = re.search(pattern, article_text, re.DOTALL)
52
+
53
  if match:
54
  return match.group(1).strip()
55
  else:
56
  return None
57
 
58
+ # ์ œํ’ˆ๋ช… ์ถ”์ถœ ํ•จ์ˆ˜ ์ˆ˜์ • (์—ฌ๋Ÿฌ ๋ชจ๋ธ๋ช… ์ฒ˜๋ฆฌ)
59
+
60
  def extract_product_info(article_text):
61
  brand_match = re.search(r'<๋ธŒ๋žœ๋“œ>[ \t]*([^\n]+)', article_text)
 
 
62
  brand = brand_match.group(1).strip() if brand_match else None
63
+
64
+ model_matches = re.findall(r'<๋ชจ๋ธ๋ช…>[ \t]*([^\n]+)', article_text)
65
 
66
  result = []
67
 
68
+ if brand and model_matches:
69
  brand_words = re.findall(r'[A-Za-z]+', brand)
70
+ selected_brand = ' '.join(brand_words[:2])
71
 
72
+ for model in model_matches:
73
+ model = model.strip()
74
  search_query = f"{selected_brand} {model}"
75
  search_url = f"https://www.coupang.com/np/search?component=&q={search_query.replace(' ', '+')}"
76
  result.append((model, search_url))
77
+
78
  return result
79
 
80
+ # process_url ํ•จ์ˆ˜ ์ˆ˜์ • (๋ชจ๋ธ๋ช… ์—ฌ๋Ÿฌ๊ฐœ ๋Œ€์‘)
81
+
82
  def process_url(tistory_url, output_dir):
83
  result = []
84
  try:
 
88
  img_tags = soup.find_all('img')
89
  img_urls = [img['src'] for img in img_tags if 'src' in img.attrs]
90
 
91
+ # ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ
92
  for idx, img_url in enumerate(img_urls):
93
  base_name = os.path.basename(img_url).split('?')[0]
94
  if base_name.startswith('new_ico'):
 
108
  name, ext = os.path.splitext(base_name)
109
  save_path = os.path.join(output_dir, f"{name}_{idx+1}_processed.png")
110
  final_img.save(save_path)
111
+ result.append(f"โœ”๏ธ ์ €์žฅ ์™„๋ฃŒ: {save_path}")
112
 
113
+ # ํ…์ŠคํŠธ ์ถ”์ถœ
 
 
 
114
  article_text = soup.get_text()
115
 
116
+ # ํ”„๋กœ๊ทธ๋žจ ์ •๋ณด ๋ธ”๋ก ์ถ”์ถœ
117
  info_block = extract_info_block(article_text)
 
118
 
119
+ # ์ œํ’ˆ ์ •๋ณด ์ถ”์ถœ (์—ฌ๋Ÿฌ ๋ชจ๋ธ๋ช… ์ฒ˜๋ฆฌ)
120
+ item_info_list = extract_product_info(article_text)
121
+
122
+ # ์ •๋ฆฌ๋œ ์ถœ๋ ฅ ํฌ๋งท
123
  if info_block:
124
  result.append("\n===== ํ”„๋กœ๊ทธ๋žจ ์ •๋ณด ๋ธ”๋ก =====")
125
  result.append(info_block)
126
  result.append("================================\n")
127
 
128
+ for model_name, search_url in item_info_list:
129
+ result.append(f"๐Ÿ›๏ธ ์•„์ดํ…œ : {model_name}")
130
+ result.append(f"๐Ÿ”— ์ฟ ํŒก ๋งํฌ: {search_url}")
 
131
 
132
  except Exception as e:
133
  result.append(f"โŒ URL ์ฒ˜๋ฆฌ ์‹คํŒจ: {tistory_url} / ์—๋Ÿฌ: {e}")
134
 
135
  return result
136
 
137
+
138
  # ์—ฌ๋Ÿฌ URL ์ฒ˜๋ฆฌ
139
  def process_multiple_urls(urls_text, output_dir):
140
  urls = [url.strip() for url in urls_text.strip().splitlines() if url.strip()]