unnastyle commited on
Commit
d44208f
ยท
verified ยท
1 Parent(s): 670a9c0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -231
app.py CHANGED
@@ -1,148 +1,53 @@
1
- import gradio as gr
2
- import requests
3
- import re
4
  import os
5
  import openai
6
 
7
- def debug_print(*args):
8
- print("[DEBUG]", *args)
9
-
10
- ########################################################
11
- # (1) ๊ธฐ์กด ํ•จ์ˆ˜ (parse_track_number, fetch_script ๋“ฑ)
12
- ########################################################
13
-
14
- def parse_track_number(input_text):
15
- debug_print("parse_track_number() called with input_text:", input_text)
16
- match = re.search(r'<track[^>]*\skind="captions"[^>]*\ssrc="([^"]+)"', input_text)
17
- if not match:
18
- debug_print("ํŠธ๋ž™ ํƒœ๊ทธ(kinds='captions')์—์„œ src ์ถ”์ถœ ์‹คํŒจ")
19
- return None
20
-
21
- track_number = match.group(1).strip()
22
- debug_print("์ดˆ๊ธฐ ์ถ”์ถœ๋œ track_number:", track_number)
23
-
24
- if 'blob:' in track_number:
25
- track_number = track_number.replace('blob:', '')
26
- debug_print("blob: ์ œ๊ฑฐ ํ›„ track_number:", track_number)
27
-
28
- if 'https://player.vimeo.com' in track_number:
29
- track_number = track_number.replace('https://player.vimeo.com', '')
30
- debug_print("๋„๋ฉ”์ธ ์ œ๊ฑฐ ํ›„ track_number:", track_number)
31
-
32
- track_number = track_number.strip()
33
- if not track_number.startswith('/'):
34
- track_number = '/' + track_number
35
- debug_print("์•ž์— / ์—†๋Š” ๊ฒฝ์šฐ ์ถ”๊ฐ€ ํ›„ track_number:", track_number)
36
-
37
- debug_print("์ตœ์ข… track_number:", track_number)
38
- return track_number
39
-
40
- def fetch_script(url):
41
- track_number = parse_track_number(url)
42
- if not track_number:
43
- return (
44
- "ํŠธ๋ž™๋ฒˆํ˜ธ๋ฅผ ์ฐพ์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค.\n"
45
- "์˜ˆ: <track kind='captions' src='/1234567' ...>"
46
- )
47
-
48
- target_url = f"https://player.vimeo.com{track_number}"
49
- debug_print("์ ‘์†ํ•  ํƒ€๊ฒŸ URL:", target_url)
50
-
51
- try:
52
- response = requests.get(target_url)
53
- if response.status_code == 200:
54
- debug_print("์Šคํฌ๋ฆฝํŠธ ๊ฐ€์ ธ์˜ค๊ธฐ ์„ฑ๊ณต")
55
- return response.text
56
- else:
57
- debug_print("HTTP ์—๋Ÿฌ ๋ฐœ์ƒ:", response.status_code)
58
- return f"HTTP ์—๋Ÿฌ: {response.status_code}"
59
- except Exception as e:
60
- debug_print("์˜ˆ์™ธ ๋ฐœ์ƒ:", str(e))
61
- return f"์—๋Ÿฌ ๋ฐœ์ƒ: {str(e)}"
62
-
63
- def show_next_field(num_visible):
64
- debug_print("show_next_field() called with num_visible:", num_visible)
65
- if num_visible < 5:
66
- num_visible += 1
67
- debug_print("์ƒˆ๋กœ์šด num_visible ๊ฐ’:", num_visible)
68
-
69
- updates = []
70
- for i in range(1, 6):
71
- updates.append(gr.update(visible=(i <= num_visible)))
72
-
73
- return [num_visible] + updates
74
-
75
- ########################################################
76
- # (2) (๊ธฐ์กด) ์ „์ฒด ์Šคํฌ๋ฆฝํŠธ ๋กœ์ง
77
- ########################################################
78
-
79
- def is_vtt_timeline_line(line: str) -> bool:
80
- pattern = r'^\d{2}:\d{2}:\d{2}(\.\d+)?\s*-->\s*\d{2}:\d{2}:\d{2}(\.\d+)?$'
81
- return bool(re.match(pattern, line.strip()))
82
-
83
- def is_number_line(line: str) -> bool:
84
- pattern = r'^\d+[\.\)]?$'
85
- return bool(re.match(pattern, line.strip()))
86
-
87
- def is_webvtt_header(line: str) -> bool:
88
- return line.strip() == "WEBVTT"
89
-
90
- def combine_scripts(script1, script2, script3, script4, script5):
91
- scripts = [script1, script2, script3, script4, script5]
92
- full_merged = []
93
- filtered_merged = []
94
-
95
- for idx, sc in enumerate(scripts, start=1):
96
- if not sc or not sc.strip():
97
- continue
98
-
99
- lines = sc.splitlines()
100
- # (A) ์ „์ฒด ์Šคํฌ๋ฆฝํŠธ
101
- full_merged.append(f"[๊ฐ•์˜{idx}]")
102
- full_merged.extend(lines)
103
-
104
- # (B) ์ˆ˜์ •์‚ฌํ•ญ
105
- filtered_merged.append(f"[๊ฐ•์˜{idx}]")
106
- for line in lines:
107
- if is_vtt_timeline_line(line):
108
- continue
109
- if is_number_line(line):
110
- continue
111
- if is_webvtt_header(line):
112
- continue
113
- filtered_merged.append(line)
114
-
115
- old_combined_script = "\n".join(full_merged)
116
- old_changes_summary = "\n".join(filtered_merged)
117
- return old_combined_script, old_changes_summary
118
-
119
- ########################################################
120
- # (3) LLM์„ ์‚ฌ์šฉํ•œ ๋ฌธ์žฅ ๊ต์ •: ํ•œ ํ…์ŠคํŠธ๋กœ ํ•ฉ์ณ์„œ ๋ฐ˜ํ™˜
121
- ########################################################
122
-
123
- def refine_with_llm(old_changes_summary_text):
124
  """
125
- (๊ธฐ์กด) ์ˆ˜์ •์‚ฌํ•ญ์„ ๋ฐ›์•„ ๊ต์ • ๋ณธ๋ฌธ + ์ˆ˜์ •๊ต์ •์‚ฌํ•ญ์„
126
- ํ•˜๋‚˜์˜ ๋ฌธ์ž์—ด๋กœ ํ•ฉ์ณ์„œ ๋ฐ˜ํ™˜.
 
 
 
 
 
 
 
127
  """
128
  openai.api_key = os.getenv("OPENAI_API_KEY")
 
 
 
 
 
129
 
130
  system_prompt = (
131
- "๋‹น์‹ ์€ ํ•œ๊ตญ์–ด ๊ต์ • ์ „๋ฌธ๊ฐ€์ž…๋‹ˆ๋‹ค. "
132
- "์ ˆ๋Œ€ ์š”์•ฝ/์‚ญ์ œ ์—†์ด, ๋งž์ถค๋ฒ•/๋„์–ด์“ฐ๊ธฐ๋ฅผ ๊ต์ •ํ•ด ์ฃผ์„ธ์š”."
133
- "๊ต์ • ๋‚ด์—ญ๋„ ๋ณ„๋„๋กœ ํ‘œ์‹œํ•ด ์ฃผ์„ธ์š”."
 
 
 
 
 
 
 
 
134
  )
135
- user_prompt = f"```\n{old_changes_summary_text}\n```\n์œ„ ํ…์ŠคํŠธ๋ฅผ ๊ต์ •ํ•ด ์ฃผ์„ธ์š”"
136
 
137
- if not openai.api_key:
138
- # ํ•˜๋‚˜์˜ ํ…์ŠคํŠธ๋กœ ๋ฐ”๋กœ ํ•ฉ์ณ์„œ ๋ฐ˜ํ™˜
139
- return (
140
- "OpenAI API ํ‚ค๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.\n\n=== ์ˆ˜์ •ยท๊ต์ • ์‚ฌํ•ญ ===\n์—†์Œ"
141
- )
 
 
 
 
142
 
143
  try:
 
144
  response = openai.ChatCompletion.create(
145
- model="gpt-4o-mini", # ์‹ค์ œ ๋ชจ๋ธ ์กด์žฌ ์—ฌ๋ถ€ ํ™•์ธ ํ•„์š”
146
  messages=[
147
  {"role": "system", "content": system_prompt},
148
  {"role": "user", "content": user_prompt}
@@ -151,104 +56,18 @@ def refine_with_llm(old_changes_summary_text):
151
  )
152
  raw_output = response.choices[0].message["content"].strip()
153
  except Exception as e:
154
- return f"LLM ํ˜ธ์ถœ ์˜ค๋ฅ˜: {str(e)}\n\n=== ์ˆ˜์ •ยท๊ต์ • ์‚ฌํ•ญ ===\n์ƒ์„ฑ ์‹คํŒจ"
155
-
156
- # "=== ๊ต์ •๋œ ๋ณธ๋ฌธ ===" / "=== ์ˆ˜์ •ยท๊ต์ • ์‚ฌํ•ญ ===" ์œผ๋กœ ๊ตฌ๋ถ„ํ•œ๋‹ค๊ณ  ๊ฐ€์ •
157
- corrected_sep = "=== ๊ต์ •๋œ ๋ณธ๋ฌธ ==="
158
- changes_sep = "=== ์ˆ˜์ •ยท๊ต์ • ์‚ฌํ•ญ ==="
159
-
160
- corrected_text = ""
161
- changes_text = ""
162
-
163
- if corrected_sep in raw_output and changes_sep in raw_output:
164
- part1 = raw_output.split(corrected_sep, 1)[1]
165
- if changes_sep in part1:
166
- corrected_text, maybe_log = part1.split(changes_sep, 1)
167
- corrected_text = corrected_text.strip()
168
- changes_text = maybe_log.strip()
169
- else:
170
- corrected_text = part1.strip()
171
- changes_text = "์ˆ˜์ • ์‚ฌํ•ญ ๊ตฌ๋ถ„์ด ์—†์Šต๋‹ˆ๋‹ค."
172
- else:
173
- corrected_text = raw_output
174
- changes_text = "LLM ์‘๋‹ต์— ์ˆ˜์ • ์‚ฌํ•ญ ๊ตฌ๋ถ„์ด ์—†์Šต๋‹ˆ๋‹ค."
175
-
176
- # **๊ฐ•์˜๋‚ด์šฉ + ์ˆ˜์ •๊ต์ •์‚ฌํ•ญ**์„ ํ•œ ํ…์ŠคํŠธ๋กœ ํ•ฉ์ณ ๋ฐ˜ํ™˜
177
- final_output = f"{corrected_text}\n\n=== ์ˆ˜์ •ยท๊ต์ • ์‚ฌํ•ญ ===\n{changes_text}"
178
- return final_output
179
-
180
- ########################################################
181
- # (4) build_app
182
- ########################################################
183
-
184
- def build_app():
185
- with gr.Blocks() as demo:
186
- gr.Markdown("## ๊ฐ•์˜ URL ์ž…๋ ฅ๊ธฐ")
187
-
188
- num_visible = gr.State(value=2)
189
- outputs = []
190
- rows = []
191
-
192
- for i in range(1, 6):
193
- with gr.Row(visible=(i <= 2)) as row:
194
- with gr.Column():
195
- inp = gr.Textbox(
196
- label=f"๊ฐ•์˜{i} URL ์ž…๋ ฅ",
197
- placeholder="ex) <track kind='captions' src='/1234567.vtt' ...>"
198
- )
199
- btn = gr.Button("์Šคํฌ๋ฆฝํŠธ ๊ฐ€์ ธ์˜ค๊ธฐ")
200
- out = gr.Textbox(
201
- label=f"๊ฐ•์˜{i} ์Šคํฌ๋ฆฝํŠธ",
202
- lines=8
203
- )
204
- btn.click(fetch_script, inputs=inp, outputs=out)
205
-
206
- rows.append(row)
207
- outputs.append(out)
208
-
209
- plus_btn = gr.Button("+")
210
- plus_btn.click(show_next_field, inputs=num_visible, outputs=[num_visible]+rows)
211
-
212
- # (๊ธฐ์กด) ์ „์ฒด ์Šคํฌ๋ฆฝํŠธ ๋งŒ๋“ค๊ธฐ
213
- with gr.Row():
214
- old_combine_button = gr.Button("(๊ธฐ์กด) ์ „์ฒด ์Šคํฌ๋ฆฝํŠธ ๋งŒ๋“ค๊ธฐ")
215
- with gr.Row():
216
- old_combined_box = gr.Textbox(
217
- label="(๊ธฐ์กด) ์ „์ฒด ์Šคํฌ๋ฆฝํŠธ",
218
- lines=10
219
- )
220
- old_changes_box = gr.Textbox(
221
- label="(๊ธฐ์กด) ์ˆ˜์ •์‚ฌํ•ญ",
222
- lines=8
223
- )
224
-
225
- old_combine_button.click(
226
- fn=combine_scripts,
227
- inputs=outputs,
228
- outputs=[old_combined_box, old_changes_box]
229
- )
230
-
231
- # (์‹ ๊ทœ) LLM (๊ต์ •๋œ ๋ณธ๋ฌธ + ์ˆ˜์ •๊ต์ •์‚ฌํ•ญ) -> ๋‹จ์ผ ํ…์ŠคํŠธ๋ฐ•์Šค
232
- with gr.Row():
233
- llm_button = gr.Button("LLM(gpt-4o-mini)๋กœ ๊ฐ•์˜๋‚ด์šฉ ์ •๋ฆฌ")
234
-
235
- with gr.Row():
236
- final_output_box = gr.Textbox(
237
- label="๊ฐ•์˜๋‚ด์šฉ (์ˆ˜์ •ยท๊ต์ • ์‚ฌํ•ญ ํฌํ•จ)",
238
- lines=15,
239
- placeholder="์ด๊ณณ์— ๊ต์ •๋œ ๋ณธ๋ฌธ + ์ˆ˜์ •๊ต์ •์‚ฌํ•ญ์ด ํ•จ๊ป˜ ํ‘œ์‹œ๋ฉ๋‹ˆ๋‹ค."
240
- )
241
-
242
- # LLM ํ˜ธ์ถœ -> ํ•˜๋‚˜์˜ ํ…์ŠคํŠธ๋กœ ํ•ฉ์ณ์„œ final_output_box์— ํ‘œ์‹œ
243
- llm_button.click(
244
- fn=refine_with_llm,
245
- inputs=old_changes_box,
246
- outputs=final_output_box
247
  )
248
 
249
- return demo
250
-
251
-
252
- if __name__ == "__main__":
253
- demo = build_app()
254
- demo.launch()
 
 
 
 
 
 
 
 
1
  import os
2
  import openai
3
 
4
+ def refine_with_llm_new(old_changes_summary_text: str):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  """
6
+ 1. (๊ธฐ์กด) ์ˆ˜์ •์‚ฌํ•ญ ํ…์ŠคํŠธ๋ฅผ ์ž…๋ ฅ๋ฐ›์•„,
7
+ 2. ์ค„๋ฐ”๊ฟˆ/๋„์–ด์“ฐ๊ธฐ๋ฅผ ๋ฌธ๋งฅ์— ๋งž๊ฒŒ ์žฌ๊ตฌ์„ฑ,
8
+ 3. ๋‚ด์šฉ์€ ์ ˆ๋Œ€ ์š”์•ฝ/์‚ญ์ œ/์ž„์˜ ๋ณ€๊ฒฝ ๊ธˆ์ง€,
9
+ 4. ์˜คํƒ€ยท๋งž์ถค๋ฒ•๋งŒ ์ˆ˜์ •, ๊ทธ ์ˆ˜์ • ๋‚ด์—ญ์„ ๋ณ„๋„๋กœ ์ •๋ฆฌํ•ด ์•Œ๋ ค์ค€๋‹ค.
10
+
11
+ ์ถœ๋ ฅ:
12
+ (๊ต์ •๋œ ๋ณธ๋ฌธ, ์ˆ˜์ •๋‚ด์—ญ ์ •๋ฆฌ)
13
+ - ๊ต์ •๋œ ๋ณธ๋ฌธ(๋ฌธ๋งฅ์ƒ ๋ฌธ๋‹จ ๋‚˜๋ˆ„๊ฑฐ๋‚˜ ๋ถ™์ด๊ธฐ, ๋งž์ถค๋ฒ• ์ˆ˜์ • ๋ฐ˜์˜)
14
+ - ์ˆ˜์ •๋‚ด์—ญ(์ด์ „ -> ์ดํ›„, ๋ชจ๋“  ์ˆ˜์ •์‚ฌํ•ญ)
15
  """
16
  openai.api_key = os.getenv("OPENAI_API_KEY")
17
+ if not openai.api_key:
18
+ return (
19
+ "์˜ค๋ฅ˜: OPENAI_API_KEY๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.",
20
+ "์ˆ˜์ •๋‚ด์—ญ ์—†์Œ"
21
+ )
22
 
23
  system_prompt = (
24
+ "๋‹น์‹ ์€ ํ•œ๊ตญ์–ด ๋ฌธ์žฅ ๊ต์ • ์ „๋ฌธ๊ฐ€์ž…๋‹ˆ๋‹ค.\n"
25
+ "์•„๋ž˜์˜ [๊ทœ์น™]์„ ์ง€ํ‚ค๋ฉด์„œ ํ…์ŠคํŠธ๋ฅผ ๊ต์ •ํ•ด ์ฃผ์„ธ์š”.\n"
26
+ "[๊ทœ์น™]\n"
27
+ "1) (๊ธฐ์กด) ์ˆ˜์ •์‚ฌํ•ญ ํ…์ŠคํŠธ๋ฅผ ์šฐ์„  ๊ทธ๋Œ€๋กœ ๊ธฐ์–ต.\n"
28
+ "2) ์ค„๋ฐ”๊ฟˆ, ๋„์–ด์“ฐ๊ธฐ๋ฅผ ๋ฌธ๋งฅ์— ๋งž๊ฒŒ ์žฌ๊ตฌ์„ฑ.\n"
29
+ " - ๋ถ„๋ฆฌ๋˜์–ด์•ผ ํ•  ๋ฌธ๋‹จ์€ ๋ถ„๋ฆฌํ•˜๊ณ \n"
30
+ " - ๋ถ™์—ฌ์•ผ ํ•  ๋ฌธ๋‹จ์€ ๋ถ™์ด๋˜\n"
31
+ " - ๋‚ด์šฉ์€ ์ ˆ๋Œ€ ์‚ญ์ œํ•˜๊ฑฐ๋‚˜ ์š”์•ฝํ•˜์ง€ ๋ง ๊ฒƒ.\n"
32
+ "3) ์˜คํƒ€๋‚˜ ๋งž์ถค๋ฒ•์ด ์•ˆ ๋งž๋Š” ๊ฒƒ๋งŒ ์ˆ˜์ •.\n"
33
+ " - ์ˆ˜์ •ํ•œ ๋ถ€๋ถ„์€ ๋ชจ๋‘ ๋ณ„๋„๋กœ ์ •๋ฆฌ.\n"
34
+ "4) ์ ˆ๋Œ€ ์ž„์˜๋กœ ๋‚ด์šฉ์„ ๋ฐ”๊พธ์ง€ ์•Š๋Š”๋‹ค.\n"
35
  )
 
36
 
37
+ user_prompt = (
38
+ f"์•„๋ž˜ ํ…์ŠคํŠธ๋ฅผ [๊ทœ์น™]์— ๋”ฐ๋ผ ๊ต์ •ํ•ด ์ฃผ์„ธ์š”.\n"
39
+ f"```\n{old_changes_summary_text}\n```"
40
+ "\n\n"
41
+ "์ถœ๋ ฅ ํ˜•์‹:\n"
42
+ "๊ต์ •๋œ ๋ณธ๋ฌธ(๋ฌธ๋งฅ์ƒ ๋ฌธ๋‹จ ์žฌ๊ตฌ์„ฑ ๋ฐ˜์˜)\n"
43
+ "=== ์ˆ˜์ •๋‚ด์—ญ ===\n"
44
+ "๋ชจ๋“  ๊ต์ • ์‚ฌํ•ญ(์ด์ „ -> ์ดํ›„) ๋ฆฌ์ŠคํŠธ\n"
45
+ )
46
 
47
  try:
48
+ # (์˜ˆ: ์ตœ์‹  openai 1.0.0+ ์—์„œ๋Š” ChatCompletion ๋ฐฉ์‹์„ ๊ณ„์† ์ง€์›)
49
  response = openai.ChatCompletion.create(
50
+ model="gpt-4o-mini", # ์‹ค์ œ ๋ชจ๋ธ ์กด์žฌ ์—ฌ๋ถ€๋Š” ๋ณ„๋„ ํ™•์ธ ํ•„์š”
51
  messages=[
52
  {"role": "system", "content": system_prompt},
53
  {"role": "user", "content": user_prompt}
 
56
  )
57
  raw_output = response.choices[0].message["content"].strip()
58
  except Exception as e:
59
+ return (
60
+ f"LLM ํ˜ธ์ถœ ์˜ค๋ฅ˜: {str(e)}",
61
+ "์ˆ˜์ •๋‚ด์—ญ ์ •๋ณด ์ƒ์„ฑ ์‹คํŒจ"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  )
63
 
64
+ # ์•„๋ž˜๋Š” ๊ฐ€์ •๋œ ์ถœ๋ ฅ ํ˜•์‹:
65
+ # ๊ต์ •๋œ ๋ณธ๋ฌธ
66
+ # === ์ˆ˜์ •๋‚ด์—ญ ===
67
+ # (์ˆ˜์ • ์‚ฌํ•ญ๋“ค)
68
+ if "=== ์ˆ˜์ •๋‚ด์—ญ ===" in raw_output:
69
+ corrected_text, changes_text = raw_output.split("=== ์ˆ˜์ •๋‚ด์—ญ ===", 1)
70
+ return corrected_text.strip(), changes_text.strip()
71
+ else:
72
+ # ์ˆ˜์ •๋‚ด์—ญ ๊ตฌ๋ถ„์ด ์—†์œผ๋ฉด, ํ†ต์งธ๋กœ ๋ณธ๋ฌธ์œผ๋กœ ๋ฐ˜ํ™˜
73
+ return raw_output, "์ˆ˜์ •๋‚ด์—ญ ๊ตฌ๋ถ„ ์—†์Œ"