gbrabbit commited on
Commit
0553b33
ยท
1 Parent(s): 5e29010

Auto commit at 07-2025-08 1:02:24

Browse files
Files changed (2) hide show
  1. app.py +318 -107
  2. requirements.txt +1 -0
app.py CHANGED
@@ -5,6 +5,10 @@ import json
5
  import traceback
6
  from transformers import AutoTokenizer
7
  import torch
 
 
 
 
8
 
9
  # .env ํŒŒ์ผ์—์„œ ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ๋กœ๋“œ
10
  try:
@@ -48,17 +52,38 @@ try:
48
 
49
  print(" ์ปค์Šคํ…€ ๋ชจ๋ธ ๋กœ๋”ฉ ์ค‘...")
50
  # ์ปค์Šคํ…€ ๋ชจ๋ธ ํด๋ž˜์Šค import (Space ํด๋”์˜ modeling.py ์‚ฌ์šฉ)
51
- from modeling import KananaVForConditionalGeneration
 
 
 
 
 
52
 
53
- model = KananaVForConditionalGeneration.from_pretrained(
54
- MODEL_NAME,
55
- token=HF_TOKEN,
56
- torch_dtype=torch.float16,
57
- trust_remote_code=True,
58
- device_map=None,
59
- low_cpu_mem_usage=True
60
- )
61
- print(" โœ… ์ปค์Šคํ…€ ๋ชจ๋ธ ๋กœ๋”ฉ ์™„๋ฃŒ")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  else:
63
  print(" โš ๏ธ ํ† ํฐ์ด ์—†์–ด์„œ ๊ณต๊ฐœ ๋ชจ๋ธ ์‚ฌ์šฉ")
64
  MODEL_NAME = "microsoft/DialoGPT-medium"
@@ -88,16 +113,84 @@ print(f"\n3. ์ตœ์ข… ์ƒํƒœ:")
88
  print(f" MODEL_LOADED: {MODEL_LOADED}")
89
  print(f" ์ตœ์ข… ๋ชจ๋ธ๋ช…: {MODEL_NAME}")
90
 
91
- def chat_with_model(message, history, image=None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  if not MODEL_LOADED:
 
93
  return "โŒ ๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."
 
94
  try:
95
- inputs = tokenizer(message, return_tensors="pt")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
 
 
 
 
 
 
 
97
  with torch.no_grad():
98
- if image is not None:
 
99
  # ์ด๋ฏธ์ง€๊ฐ€ ์žˆ๋Š” ๊ฒฝ์šฐ ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ƒ์„ฑ
100
- from PIL import Image
101
  import torchvision.transforms as transforms
102
 
103
  # ์ด๋ฏธ์ง€ ์ „์ฒ˜๋ฆฌ
@@ -107,85 +200,157 @@ def chat_with_model(message, history, image=None):
107
  transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
108
  ])
109
 
110
- if isinstance(image, str):
111
- pil_image = Image.open(image).convert('RGB')
112
- else:
113
- pil_image = image.convert('RGB')
114
-
115
  pixel_values = transform(pil_image).unsqueeze(0)
116
  image_metas = {"vision_grid_thw": torch.tensor([[1, 14, 14]])} # ๊ธฐ๋ณธ ๊ทธ๋ฆฌ๋“œ ํฌ๊ธฐ
117
 
 
 
 
118
  # ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ๋ชจ๋ธ์˜ forward ๋ฉ”์„œ๋“œ ์‚ฌ์šฉ
119
- outputs = model(
120
- input_ids=inputs["input_ids"],
121
- attention_mask=inputs["attention_mask"],
122
- pixel_values=[pixel_values],
123
- image_metas=image_metas,
124
- max_new_tokens=200,
125
- temperature=0.7,
126
- do_sample=True,
127
- pad_token_id=tokenizer.eos_token_id
128
- )
 
 
 
 
 
 
 
129
  else:
130
- # ์ด๋ฏธ์ง€๊ฐ€ ์—†๋Š” ๊ฒฝ์šฐ ํ…์ŠคํŠธ๋งŒ ์ƒ์„ฑ
131
- outputs = model(
132
- input_ids=inputs["input_ids"],
133
- attention_mask=inputs["attention_mask"],
134
- max_new_tokens=200,
135
- temperature=0.7,
136
- do_sample=True,
137
- pad_token_id=tokenizer.eos_token_id
138
- )
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
  # outputs๊ฐ€ ํŠœํ”Œ์ธ ๊ฒฝ์šฐ ์ฒซ ๋ฒˆ์งธ ์š”์†Œ ์‚ฌ์šฉ
141
  if isinstance(outputs, tuple):
 
142
  logits = outputs[0]
 
143
  else:
144
- logits = outputs.logits if hasattr(outputs, 'logits') else outputs
 
 
 
 
 
 
145
 
 
146
  # ๊ฐ€์žฅ ๋†’์€ ํ™•๋ฅ ์˜ ํ† ํฐ ์„ ํƒ
147
  next_token = torch.argmax(logits[:, -1, :], dim=-1)
148
  generated_tokens = [next_token]
 
149
 
150
  # ์ถ”๊ฐ€ ํ† ํฐ ์ƒ์„ฑ
151
- for _ in range(199): # max_new_tokens - 1
 
 
 
 
152
  inputs["input_ids"] = torch.cat([inputs["input_ids"], next_token.unsqueeze(-1)], dim=-1)
153
  inputs["attention_mask"] = torch.cat([inputs["attention_mask"], torch.ones_like(next_token.unsqueeze(-1))], dim=-1)
154
 
155
  with torch.no_grad():
156
- outputs = model(**inputs)
157
- if isinstance(outputs, tuple):
158
- logits = outputs[0]
159
- else:
160
- logits = outputs.logits if hasattr(outputs, 'logits') else outputs
161
-
162
- next_token = torch.argmax(logits[:, -1, :], dim=-1)
163
- generated_tokens.append(next_token)
164
-
165
- if next_token.item() == tokenizer.eos_token_id:
166
- break
 
 
 
 
 
167
 
 
168
  # ์ƒ์„ฑ๋œ ํ† ํฐ๋“ค์„ ๋””์ฝ”๋”ฉ
169
  generated_ids = torch.cat(generated_tokens, dim=0)
170
  response = tokenizer.decode(generated_ids, skip_special_tokens=True)
 
 
 
 
 
171
 
172
- if message in response:
173
- response = response.replace(message, "").strip()
174
  return response if response else "์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ์‘๋‹ต์„ ์ƒ์„ฑํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
175
  except Exception as e:
 
 
 
 
176
  return f"์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"
177
 
178
- def solve_math_problem(problem, image=None):
 
 
 
 
 
179
  if not MODEL_LOADED:
 
180
  return "โŒ ๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."
 
181
  try:
182
- prompt = f"๋‹ค์Œ ์ˆ˜ํ•™ ๋ฌธ์ œ๋ฅผ ๋‹จ๊ณ„๋ณ„๋กœ ํ’€์–ด์ฃผ์„ธ์š”: {problem}"
183
- inputs = tokenizer(prompt, return_tensors="pt")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
 
 
185
  with torch.no_grad():
186
- if image is not None:
 
187
  # ์ด๋ฏธ์ง€๊ฐ€ ์žˆ๋Š” ๊ฒฝ์šฐ ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ƒ์„ฑ
188
- from PIL import Image
189
  import torchvision.transforms as transforms
190
 
191
  # ์ด๋ฏธ์ง€ ์ „์ฒ˜๋ฆฌ
@@ -195,72 +360,118 @@ def solve_math_problem(problem, image=None):
195
  transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
196
  ])
197
 
198
- if isinstance(image, str):
199
- pil_image = Image.open(image).convert('RGB')
200
- else:
201
- pil_image = image.convert('RGB')
202
-
203
  pixel_values = transform(pil_image).unsqueeze(0)
204
  image_metas = {"vision_grid_thw": torch.tensor([[1, 14, 14]])} # ๊ธฐ๋ณธ ๊ทธ๋ฆฌ๋“œ ํฌ๊ธฐ
205
 
 
 
 
206
  # ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ๋ชจ๋ธ์˜ forward ๋ฉ”์„œ๋“œ ์‚ฌ์šฉ
207
- outputs = model(
208
- input_ids=inputs["input_ids"],
209
- attention_mask=inputs["attention_mask"],
210
- pixel_values=[pixel_values],
211
- image_metas=image_metas,
212
- max_new_tokens=300,
213
- temperature=0.3,
214
- do_sample=True,
215
- pad_token_id=tokenizer.eos_token_id
216
- )
 
 
 
 
 
 
 
217
  else:
218
- # ์ด๋ฏธ์ง€๊ฐ€ ์—†๋Š” ๊ฒฝ์šฐ ํ…์ŠคํŠธ๋งŒ ์ƒ์„ฑ
219
- outputs = model(
220
- input_ids=inputs["input_ids"],
221
- attention_mask=inputs["attention_mask"],
222
- max_new_tokens=300,
223
- temperature=0.3,
224
- do_sample=True,
225
- pad_token_id=tokenizer.eos_token_id
226
- )
 
 
 
 
 
 
 
 
 
 
 
 
227
 
228
  # outputs๊ฐ€ ํŠœํ”Œ์ธ ๊ฒฝ์šฐ ์ฒซ ๋ฒˆ์งธ ์š”์†Œ ์‚ฌ์šฉ
229
  if isinstance(outputs, tuple):
 
230
  logits = outputs[0]
 
231
  else:
232
- logits = outputs.logits if hasattr(outputs, 'logits') else outputs
 
 
 
 
 
 
233
 
 
234
  # ๊ฐ€์žฅ ๋†’์€ ํ™•๋ฅ ์˜ ํ† ํฐ ์„ ํƒ
235
  next_token = torch.argmax(logits[:, -1, :], dim=-1)
236
  generated_tokens = [next_token]
 
237
 
238
  # ์ถ”๊ฐ€ ํ† ํฐ ์ƒ์„ฑ
239
- for _ in range(299): # max_new_tokens - 1
 
 
 
 
240
  inputs["input_ids"] = torch.cat([inputs["input_ids"], next_token.unsqueeze(-1)], dim=-1)
241
  inputs["attention_mask"] = torch.cat([inputs["attention_mask"], torch.ones_like(next_token.unsqueeze(-1))], dim=-1)
242
 
243
  with torch.no_grad():
244
- outputs = model(**inputs)
245
- if isinstance(outputs, tuple):
246
- logits = outputs[0]
247
- else:
248
- logits = outputs.logits if hasattr(outputs, 'logits') else outputs
249
-
250
- next_token = torch.argmax(logits[:, -1, :], dim=-1)
251
- generated_tokens.append(next_token)
252
-
253
- if next_token.item() == tokenizer.eos_token_id:
254
- break
 
 
 
 
 
255
 
 
256
  # ์ƒ์„ฑ๋œ ํ† ํฐ๋“ค์„ ๋””์ฝ”๋”ฉ
257
  generated_ids = torch.cat(generated_tokens, dim=0)
258
  response = tokenizer.decode(generated_ids, skip_special_tokens=True)
 
 
 
 
 
259
 
260
- if prompt in response:
261
- response = response.replace(prompt, "").strip()
262
  return response if response else "์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ์ˆ˜ํ•™ ๋ฌธ์ œ๋ฅผ ํ’€ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
263
  except Exception as e:
 
 
 
 
264
  return f"์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"
265
 
266
  with gr.Blocks(title="Lily Math RAG System", theme=gr.themes.Soft()) as demo:
@@ -274,16 +485,16 @@ with gr.Blocks(title="Lily Math RAG System", theme=gr.themes.Soft()) as demo:
274
  msg = gr.Textbox(label="๋ฉ”์‹œ์ง€๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”", placeholder="์•ˆ๋…•ํ•˜์„ธ์š”! ์ˆ˜ํ•™ ๋ฌธ์ œ๋ฅผ ๋„์™€์ฃผ์„ธ์š”.", lines=2)
275
  clear = gr.Button("๋Œ€ํ™” ์ดˆ๊ธฐํ™”")
276
  with gr.Column(scale=1):
277
- gr.Markdown("### ๐Ÿ“ท ์ด๋ฏธ์ง€ ์—…๋กœ๋“œ")
278
- image_input = gr.Image(label="์ด๋ฏธ์ง€ (์„ ํƒ์‚ฌํ•ญ)", type="pil")
279
- gr.Markdown("์ด๋ฏธ์ง€๋ฅผ ์—…๋กœ๋“œํ•˜๋ฉด ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ๋Œ€ํ™”๊ฐ€ ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค.")
280
 
281
- def respond(message, chat_history, image):
282
- bot_message = chat_with_model(message, chat_history, image)
283
  chat_history.append({"role": "user", "content": message})
284
  chat_history.append({"role": "assistant", "content": bot_message})
285
  return "", chat_history
286
- msg.submit(respond, [msg, chatbot, image_input], [msg, chatbot])
287
  clear.click(lambda: None, None, chatbot, queue=False)
288
 
289
  with gr.Tab("๐Ÿงฎ ์ˆ˜ํ•™ ๋ฌธ์ œ ํ•ด๊ฒฐ"):
@@ -292,12 +503,12 @@ with gr.Blocks(title="Lily Math RAG System", theme=gr.themes.Soft()) as demo:
292
  math_input = gr.Textbox(label="์ˆ˜ํ•™ ๋ฌธ์ œ", placeholder="์˜ˆ: 2x + 5 = 13", lines=3)
293
  solve_btn = gr.Button("๋ฌธ์ œ ํ’€๊ธฐ", variant="primary")
294
  with gr.Column(scale=1):
295
- gr.Markdown("### ๐Ÿ“ท ์ด๋ฏธ์ง€ ์—…๋กœ๋“œ")
296
- math_image_input = gr.Image(label="์ˆ˜ํ•™ ๋ฌธ์ œ ์ด๋ฏธ์ง€ (์„ ํƒ์‚ฌํ•ญ)", type="pil")
297
- gr.Markdown("์ˆ˜ํ•™ ๋ฌธ์ œ ์ด๋ฏธ์ง€๋ฅผ ์—…๋กœ๋“œํ•˜๋ฉด ๋” ์ •ํ™•ํ•œ ๋‹ต๋ณ€์„ ๋ฐ›์„ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.")
298
  with gr.Column(scale=2):
299
  math_output = gr.Textbox(label="ํ•ด๋‹ต", lines=8, interactive=False)
300
- solve_btn.click(solve_math_problem, [math_input, math_image_input], math_output)
301
 
302
  with gr.Tab("โš™๏ธ ์„ค์ •"):
303
  gr.Markdown("## ์‹œ์Šคํ…œ ์ •๋ณด")
 
5
  import traceback
6
  from transformers import AutoTokenizer
7
  import torch
8
+ import fitz # PyMuPDF
9
+ from PIL import Image
10
+ import io
11
+ import base64
12
 
13
  # .env ํŒŒ์ผ์—์„œ ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ๋กœ๋“œ
14
  try:
 
52
 
53
  print(" ์ปค์Šคํ…€ ๋ชจ๋ธ ๋กœ๋”ฉ ์ค‘...")
54
  # ์ปค์Šคํ…€ ๋ชจ๋ธ ํด๋ž˜์Šค import (Space ํด๋”์˜ modeling.py ์‚ฌ์šฉ)
55
+ try:
56
+ from modeling import KananaVForConditionalGeneration
57
+ print(" โœ… modeling.py import ์„ฑ๊ณต")
58
+ except Exception as import_error:
59
+ print(f" โŒ modeling.py import ์‹คํŒจ: {import_error}")
60
+ raise import_error
61
 
62
+ try:
63
+ print(f" ๋ชจ๋ธ ๋กœ๋”ฉ ํŒŒ๋ผ๋ฏธํ„ฐ:")
64
+ print(f" MODEL_NAME: {MODEL_NAME}")
65
+ print(f" torch_dtype: {torch.float16}")
66
+ print(f" trust_remote_code: True")
67
+ print(f" device_map: None")
68
+ print(f" low_cpu_mem_usage: True")
69
+
70
+ model = KananaVForConditionalGeneration.from_pretrained(
71
+ MODEL_NAME,
72
+ token=HF_TOKEN,
73
+ torch_dtype=torch.float16,
74
+ trust_remote_code=True,
75
+ device_map=None,
76
+ low_cpu_mem_usage=True
77
+ )
78
+ print(" โœ… ์ปค์Šคํ…€ ๋ชจ๋ธ ๋กœ๋”ฉ ์™„๋ฃŒ")
79
+ print(f" ๋ชจ๋ธ ํƒ€์ž…: {type(model)}")
80
+ print(f" ๋ชจ๋ธ ๋””๋ฐ”์ด์Šค: {next(model.parameters()).device}")
81
+ except Exception as model_error:
82
+ print(f" โŒ ์ปค์Šคํ…€ ๋ชจ๋ธ ๋กœ๋”ฉ ์‹คํŒจ: {model_error}")
83
+ print(f" ์˜ค๋ฅ˜ ํƒ€์ž…: {type(model_error).__name__}")
84
+ import traceback
85
+ traceback.print_exc()
86
+ raise model_error
87
  else:
88
  print(" โš ๏ธ ํ† ํฐ์ด ์—†์–ด์„œ ๊ณต๊ฐœ ๋ชจ๋ธ ์‚ฌ์šฉ")
89
  MODEL_NAME = "microsoft/DialoGPT-medium"
 
113
  print(f" MODEL_LOADED: {MODEL_LOADED}")
114
  print(f" ์ตœ์ข… ๋ชจ๋ธ๋ช…: {MODEL_NAME}")
115
 
116
+ def extract_text_from_pdf(pdf_file):
117
+ """PDF์—์„œ ํ…์ŠคํŠธ ์ถ”์ถœ"""
118
+ try:
119
+ doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
120
+ text = ""
121
+ for page in doc:
122
+ text += page.get_text()
123
+ doc.close()
124
+ return text
125
+ except Exception as e:
126
+ return f"PDF ์ฝ๊ธฐ ์˜ค๋ฅ˜: {str(e)}"
127
+
128
+ def extract_text_from_image(image_file):
129
+ """์ด๋ฏธ์ง€์—์„œ OCR๋กœ ํ…์ŠคํŠธ ์ถ”์ถœ"""
130
+ try:
131
+ # PIL๋กœ ์ด๋ฏธ์ง€ ์—ด๊ธฐ
132
+ image = Image.open(image_file)
133
+
134
+ # ๊ฐ„๋‹จํ•œ OCR (์‹ค์ œ๋กœ๋Š” ๋” ์ •๊ตํ•œ OCR ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ ์‚ฌ์šฉ ํ•„์š”)
135
+ # ์—ฌ๊ธฐ์„œ๋Š” ์ด๋ฏธ์ง€ ์ •๋ณด๋งŒ ๋ฐ˜ํ™˜
136
+ return f"์ด๋ฏธ์ง€ ํŒŒ์ผ: {image.size[0]}x{image.size[1]} ํ”ฝ์…€"
137
+ except Exception as e:
138
+ return f"์ด๋ฏธ์ง€ ์ฝ๊ธฐ ์˜ค๋ฅ˜: {str(e)}"
139
+
140
+ def process_uploaded_file(file):
141
+ """์—…๋กœ๋“œ๋œ ํŒŒ์ผ ์ฒ˜๋ฆฌ"""
142
+ if file is None:
143
+ return None, None
144
+
145
+ file_path = file.name
146
+ file_extension = file_path.lower().split('.')[-1]
147
+
148
+ if file_extension == 'pdf':
149
+ text_content = extract_text_from_pdf(file)
150
+ return text_content, None
151
+ elif file_extension in ['png', 'jpg', 'jpeg']:
152
+ text_content = extract_text_from_image(file)
153
+ return text_content, file
154
+ else:
155
+ return f"์ง€์›ํ•˜์ง€ ์•Š๋Š” ํŒŒ์ผ ํ˜•์‹: {file_extension}", None
156
+
157
+ def chat_with_model(message, history, file=None):
158
+ print(f"๐Ÿ” DEBUG: chat_with_model ์‹œ์ž‘")
159
+ print(f" ๋ฉ”์‹œ์ง€: {message}")
160
+ print(f" ํŒŒ์ผ: {file}")
161
+ print(f" MODEL_LOADED: {MODEL_LOADED}")
162
+
163
  if not MODEL_LOADED:
164
+ print("โŒ DEBUG: ๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์Œ")
165
  return "โŒ ๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."
166
+
167
  try:
168
+ print("๐Ÿ“ DEBUG: ํŒŒ์ผ ์ฒ˜๋ฆฌ ์‹œ์ž‘")
169
+ # ํŒŒ์ผ ์ฒ˜๋ฆฌ
170
+ file_content = ""
171
+ image_file = None
172
+ if file is not None:
173
+ print(f" ํŒŒ์ผ๋ช…: {file.name}")
174
+ text_content, image_file = process_uploaded_file(file)
175
+ print(f" ํ…์ŠคํŠธ ๋‚ด์šฉ: {text_content[:100] if text_content else 'None'}...")
176
+ print(f" ์ด๋ฏธ์ง€ ํŒŒ์ผ: {image_file}")
177
+ if text_content:
178
+ file_content = f"\n[์—…๋กœ๋“œ๋œ ํŒŒ์ผ ๋‚ด์šฉ]\n{text_content}\n"
179
+
180
+ # ๋ฉ”์‹œ์ง€์— ํŒŒ์ผ ๋‚ด์šฉ ์ถ”๊ฐ€
181
+ full_message = message + file_content
182
+ print(f"๐Ÿ“ DEBUG: ์ „์ฒด ๋ฉ”์‹œ์ง€: {full_message[:200]}...")
183
 
184
+ print("๐Ÿ”ค DEBUG: ํ† ํฌ๋‚˜์ด์ € ์ฒ˜๋ฆฌ ์‹œ์ž‘")
185
+ inputs = tokenizer(full_message, return_tensors="pt")
186
+ print(f" ์ž…๋ ฅ shape: {inputs['input_ids'].shape}")
187
+ print(f" attention_mask shape: {inputs['attention_mask'].shape}")
188
+
189
+ print("๐Ÿค– DEBUG: ๋ชจ๋ธ ์ถ”๋ก  ์‹œ์ž‘")
190
  with torch.no_grad():
191
+ if image_file is not None:
192
+ print("๐Ÿ–ผ๏ธ DEBUG: ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ ๋ชจ๋“œ")
193
  # ์ด๋ฏธ์ง€๊ฐ€ ์žˆ๋Š” ๊ฒฝ์šฐ ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ƒ์„ฑ
 
194
  import torchvision.transforms as transforms
195
 
196
  # ์ด๋ฏธ์ง€ ์ „์ฒ˜๋ฆฌ
 
200
  transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
201
  ])
202
 
203
+ pil_image = Image.open(image_file).convert('RGB')
 
 
 
 
204
  pixel_values = transform(pil_image).unsqueeze(0)
205
  image_metas = {"vision_grid_thw": torch.tensor([[1, 14, 14]])} # ๊ธฐ๋ณธ ๊ทธ๋ฆฌ๋“œ ํฌ๊ธฐ
206
 
207
+ print(f" ์ด๋ฏธ์ง€ shape: {pixel_values.shape}")
208
+ print(f" ์ด๋ฏธ์ง€ ๋ฉ”ํƒ€: {image_metas}")
209
+
210
  # ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ๋ชจ๋ธ์˜ forward ๋ฉ”์„œ๋“œ ์‚ฌ์šฉ
211
+ print("๐Ÿ”„ DEBUG: ๋ชจ๋ธ ํ˜ธ์ถœ (๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ)")
212
+ try:
213
+ outputs = model(
214
+ input_ids=inputs["input_ids"],
215
+ attention_mask=inputs["attention_mask"],
216
+ pixel_values=[pixel_values],
217
+ image_metas=image_metas,
218
+ max_new_tokens=200,
219
+ temperature=0.7,
220
+ do_sample=True,
221
+ pad_token_id=tokenizer.eos_token_id
222
+ )
223
+ print("โœ… DEBUG: ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ๋ชจ๋ธ ํ˜ธ์ถœ ์„ฑ๊ณต")
224
+ except Exception as model_error:
225
+ print(f"โŒ DEBUG: ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ๋ชจ๋ธ ํ˜ธ์ถœ ์‹คํŒจ: {model_error}")
226
+ print(f" ์˜ค๋ฅ˜ ํƒ€์ž…: {type(model_error).__name__}")
227
+ raise model_error
228
  else:
229
+ print("๐Ÿ“„ DEBUG: ํ…์ŠคํŠธ๋งŒ ์ฒ˜๋ฆฌ ๋ชจ๋“œ")
230
+ # ํ…์ŠคํŠธ๋งŒ ์ƒ์„ฑ
231
+ print("๐Ÿ”„ DEBUG: ๋ชจ๋ธ ํ˜ธ์ถœ (ํ…์ŠคํŠธ๋งŒ)")
232
+ try:
233
+ outputs = model(
234
+ input_ids=inputs["input_ids"],
235
+ attention_mask=inputs["attention_mask"],
236
+ max_new_tokens=200,
237
+ temperature=0.7,
238
+ do_sample=True,
239
+ pad_token_id=tokenizer.eos_token_id
240
+ )
241
+ print("โœ… DEBUG: ํ…์ŠคํŠธ ๋ชจ๋ธ ํ˜ธ์ถœ ์„ฑ๊ณต")
242
+ except Exception as model_error:
243
+ print(f"โŒ DEBUG: ํ…์ŠคํŠธ ๋ชจ๋ธ ํ˜ธ์ถœ ์‹คํŒจ: {model_error}")
244
+ print(f" ์˜ค๋ฅ˜ ํƒ€์ž…: {type(model_error).__name__}")
245
+ raise model_error
246
+
247
+ print("๐Ÿ” DEBUG: ์ถœ๋ ฅ ์ฒ˜๋ฆฌ ์‹œ์ž‘")
248
+ print(f" outputs ํƒ€์ž…: {type(outputs)}")
249
+ print(f" outputs ๋‚ด์šฉ: {outputs}")
250
 
251
  # outputs๊ฐ€ ํŠœํ”Œ์ธ ๊ฒฝ์šฐ ์ฒซ ๋ฒˆ์งธ ์š”์†Œ ์‚ฌ์šฉ
252
  if isinstance(outputs, tuple):
253
+ print("๐Ÿ“ฆ DEBUG: outputs๊ฐ€ ํŠœํ”Œ์ž„")
254
  logits = outputs[0]
255
+ print(f" logits shape: {logits.shape}")
256
  else:
257
+ print("๐Ÿ“ฆ DEBUG: outputs๊ฐ€ ๊ฐ์ฒด์ž„")
258
+ if hasattr(outputs, 'logits'):
259
+ logits = outputs.logits
260
+ print(f" logits shape: {logits.shape}")
261
+ else:
262
+ logits = outputs
263
+ print(f" outputs shape: {logits.shape}")
264
 
265
+ print("๐ŸŽฏ DEBUG: ํ† ํฐ ์ƒ์„ฑ ์‹œ๏ฟฝ๏ฟฝ")
266
  # ๊ฐ€์žฅ ๋†’์€ ํ™•๋ฅ ์˜ ํ† ํฐ ์„ ํƒ
267
  next_token = torch.argmax(logits[:, -1, :], dim=-1)
268
  generated_tokens = [next_token]
269
+ print(f" ์ฒซ ๋ฒˆ์งธ ํ† ํฐ: {next_token.item()}")
270
 
271
  # ์ถ”๊ฐ€ ํ† ํฐ ์ƒ์„ฑ
272
+ print("๐Ÿ”„ DEBUG: ๋ฐ˜๋ณต ํ† ํฐ ์ƒ์„ฑ ์‹œ์ž‘")
273
+ for i in range(199): # max_new_tokens - 1
274
+ if i % 50 == 0:
275
+ print(f" ์ง„ํ–‰๋ฅ : {i}/199")
276
+
277
  inputs["input_ids"] = torch.cat([inputs["input_ids"], next_token.unsqueeze(-1)], dim=-1)
278
  inputs["attention_mask"] = torch.cat([inputs["attention_mask"], torch.ones_like(next_token.unsqueeze(-1))], dim=-1)
279
 
280
  with torch.no_grad():
281
+ try:
282
+ outputs = model(**inputs)
283
+ if isinstance(outputs, tuple):
284
+ logits = outputs[0]
285
+ else:
286
+ logits = outputs.logits if hasattr(outputs, 'logits') else outputs
287
+
288
+ next_token = torch.argmax(logits[:, -1, :], dim=-1)
289
+ generated_tokens.append(next_token)
290
+
291
+ if next_token.item() == tokenizer.eos_token_id:
292
+ print(f" EOS ํ† ํฐ ๋ฐœ๊ฒฌ: {i}๋ฒˆ์งธ")
293
+ break
294
+ except Exception as loop_error:
295
+ print(f"โŒ DEBUG: ํ† ํฐ ์ƒ์„ฑ ๋ฃจํ”„ ์˜ค๋ฅ˜ (i={i}): {loop_error}")
296
+ raise loop_error
297
 
298
+ print("๐Ÿ”ค DEBUG: ํ† ํฐ ๋””์ฝ”๋”ฉ ์‹œ์ž‘")
299
  # ์ƒ์„ฑ๋œ ํ† ํฐ๋“ค์„ ๋””์ฝ”๋”ฉ
300
  generated_ids = torch.cat(generated_tokens, dim=0)
301
  response = tokenizer.decode(generated_ids, skip_special_tokens=True)
302
+ print(f" ์›๋ณธ ์‘๋‹ต: {response[:200]}...")
303
+
304
+ if full_message in response:
305
+ response = response.replace(full_message, "").strip()
306
+ print(f" ์ •๋ฆฌ๋œ ์‘๋‹ต: {response[:200]}...")
307
 
308
+ print("โœ… DEBUG: chat_with_model ์™„๋ฃŒ")
 
309
  return response if response else "์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ์‘๋‹ต์„ ์ƒ์„ฑํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
310
  except Exception as e:
311
+ print(f"โŒ DEBUG: chat_with_model ์ „์ฒด ์˜ค๋ฅ˜: {e}")
312
+ print(f" ์˜ค๋ฅ˜ ํƒ€์ž…: {type(e).__name__}")
313
+ import traceback
314
+ traceback.print_exc()
315
  return f"์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"
316
 
317
+ def solve_math_problem(problem, file=None):
318
+ print(f"๐Ÿ” DEBUG: solve_math_problem ์‹œ์ž‘")
319
+ print(f" ๋ฌธ์ œ: {problem}")
320
+ print(f" ํŒŒ์ผ: {file}")
321
+ print(f" MODEL_LOADED: {MODEL_LOADED}")
322
+
323
  if not MODEL_LOADED:
324
+ print("โŒ DEBUG: ๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์Œ")
325
  return "โŒ ๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."
326
+
327
  try:
328
+ print("๐Ÿ“ DEBUG: ํŒŒ์ผ ์ฒ˜๋ฆฌ ์‹œ์ž‘")
329
+ # ํŒŒ์ผ ์ฒ˜๋ฆฌ
330
+ file_content = ""
331
+ image_file = None
332
+ if file is not None:
333
+ print(f" ํŒŒ์ผ๋ช…: {file.name}")
334
+ text_content, image_file = process_uploaded_file(file)
335
+ print(f" ํ…์ŠคํŠธ ๋‚ด์šฉ: {text_content[:100] if text_content else 'None'}...")
336
+ print(f" ์ด๋ฏธ์ง€ ํŒŒ์ผ: {image_file}")
337
+ if text_content:
338
+ file_content = f"\n[์—…๋กœ๋“œ๋œ ํŒŒ์ผ ๋‚ด์šฉ]\n{text_content}\n"
339
+
340
+ # ๋ฉ”์‹œ์ง€์— ํŒŒ์ผ ๋‚ด์šฉ ์ถ”๊ฐ€
341
+ full_prompt = f"๋‹ค์Œ ์ˆ˜ํ•™ ๋ฌธ์ œ๋ฅผ ๋‹จ๊ณ„๋ณ„๋กœ ํ’€์–ด์ฃผ์„ธ์š”: {problem}{file_content}"
342
+ print(f"๐Ÿ“ DEBUG: ์ „์ฒด ํ”„๋กฌํ”„ํŠธ: {full_prompt[:200]}...")
343
+
344
+ print("๐Ÿ”ค DEBUG: ํ† ํฌ๋‚˜์ด์ € ์ฒ˜๋ฆฌ ์‹œ์ž‘")
345
+ inputs = tokenizer(full_prompt, return_tensors="pt")
346
+ print(f" ์ž…๋ ฅ shape: {inputs['input_ids'].shape}")
347
+ print(f" attention_mask shape: {inputs['attention_mask'].shape}")
348
 
349
+ print("๐Ÿค– DEBUG: ๋ชจ๋ธ ์ถ”๋ก  ์‹œ์ž‘")
350
  with torch.no_grad():
351
+ if image_file is not None:
352
+ print("๐Ÿ–ผ๏ธ DEBUG: ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ ๋ชจ๋“œ")
353
  # ์ด๋ฏธ์ง€๊ฐ€ ์žˆ๋Š” ๊ฒฝ์šฐ ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ƒ์„ฑ
 
354
  import torchvision.transforms as transforms
355
 
356
  # ์ด๋ฏธ์ง€ ์ „์ฒ˜๋ฆฌ
 
360
  transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
361
  ])
362
 
363
+ pil_image = Image.open(image_file).convert('RGB')
 
 
 
 
364
  pixel_values = transform(pil_image).unsqueeze(0)
365
  image_metas = {"vision_grid_thw": torch.tensor([[1, 14, 14]])} # ๊ธฐ๋ณธ ๊ทธ๋ฆฌ๋“œ ํฌ๊ธฐ
366
 
367
+ print(f" ์ด๋ฏธ์ง€ shape: {pixel_values.shape}")
368
+ print(f" ์ด๋ฏธ์ง€ ๋ฉ”ํƒ€: {image_metas}")
369
+
370
  # ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ๋ชจ๋ธ์˜ forward ๋ฉ”์„œ๋“œ ์‚ฌ์šฉ
371
+ print("๐Ÿ”„ DEBUG: ๋ชจ๋ธ ํ˜ธ์ถœ (๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ)")
372
+ try:
373
+ outputs = model(
374
+ input_ids=inputs["input_ids"],
375
+ attention_mask=inputs["attention_mask"],
376
+ pixel_values=[pixel_values],
377
+ image_metas=image_metas,
378
+ max_new_tokens=300,
379
+ temperature=0.3,
380
+ do_sample=True,
381
+ pad_token_id=tokenizer.eos_token_id
382
+ )
383
+ print("โœ… DEBUG: ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ๋ชจ๋ธ ํ˜ธ์ถœ ์„ฑ๊ณต")
384
+ except Exception as model_error:
385
+ print(f"โŒ DEBUG: ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ๋ชจ๋ธ ํ˜ธ์ถœ ์‹คํŒจ: {model_error}")
386
+ print(f" ์˜ค๋ฅ˜ ํƒ€์ž…: {type(model_error).__name__}")
387
+ raise model_error
388
  else:
389
+ print("๐Ÿ“„ DEBUG: ํ…์ŠคํŠธ๋งŒ ์ฒ˜๋ฆฌ ๋ชจ๋“œ")
390
+ # ํ…์ŠคํŠธ๋งŒ ์ƒ์„ฑ
391
+ print("๐Ÿ”„ DEBUG: ๋ชจ๋ธ ํ˜ธ์ถœ (ํ…์ŠคํŠธ๋งŒ)")
392
+ try:
393
+ outputs = model(
394
+ input_ids=inputs["input_ids"],
395
+ attention_mask=inputs["attention_mask"],
396
+ max_new_tokens=300,
397
+ temperature=0.3,
398
+ do_sample=True,
399
+ pad_token_id=tokenizer.eos_token_id
400
+ )
401
+ print("โœ… DEBUG: ํ…์ŠคํŠธ ๋ชจ๋ธ ํ˜ธ์ถœ ์„ฑ๊ณต")
402
+ except Exception as model_error:
403
+ print(f"โŒ DEBUG: ํ…์ŠคํŠธ ๋ชจ๋ธ ํ˜ธ์ถœ ์‹คํŒจ: {model_error}")
404
+ print(f" ์˜ค๋ฅ˜ ํƒ€์ž…: {type(model_error).__name__}")
405
+ raise model_error
406
+
407
+ print("๐Ÿ” DEBUG: ์ถœ๋ ฅ ์ฒ˜๋ฆฌ ์‹œ์ž‘")
408
+ print(f" outputs ํƒ€์ž…: {type(outputs)}")
409
+ print(f" outputs ๋‚ด์šฉ: {outputs}")
410
 
411
  # outputs๊ฐ€ ํŠœํ”Œ์ธ ๊ฒฝ์šฐ ์ฒซ ๋ฒˆ์งธ ์š”์†Œ ์‚ฌ์šฉ
412
  if isinstance(outputs, tuple):
413
+ print("๐Ÿ“ฆ DEBUG: outputs๊ฐ€ ํŠœํ”Œ์ž„")
414
  logits = outputs[0]
415
+ print(f" logits shape: {logits.shape}")
416
  else:
417
+ print("๐Ÿ“ฆ DEBUG: outputs๊ฐ€ ๊ฐ์ฒด์ž„")
418
+ if hasattr(outputs, 'logits'):
419
+ logits = outputs.logits
420
+ print(f" logits shape: {logits.shape}")
421
+ else:
422
+ logits = outputs
423
+ print(f" outputs shape: {logits.shape}")
424
 
425
+ print("๐ŸŽฏ DEBUG: ํ† ํฐ ์ƒ์„ฑ ์‹œ์ž‘")
426
  # ๊ฐ€์žฅ ๋†’์€ ํ™•๋ฅ ์˜ ํ† ํฐ ์„ ํƒ
427
  next_token = torch.argmax(logits[:, -1, :], dim=-1)
428
  generated_tokens = [next_token]
429
+ print(f" ์ฒซ ๋ฒˆ์งธ ํ† ํฐ: {next_token.item()}")
430
 
431
  # ์ถ”๊ฐ€ ํ† ํฐ ์ƒ์„ฑ
432
+ print("๐Ÿ”„ DEBUG: ๋ฐ˜๋ณต ํ† ํฐ ์ƒ์„ฑ ์‹œ์ž‘")
433
+ for i in range(299): # max_new_tokens - 1
434
+ if i % 50 == 0:
435
+ print(f" ์ง„ํ–‰๋ฅ : {i}/299")
436
+
437
  inputs["input_ids"] = torch.cat([inputs["input_ids"], next_token.unsqueeze(-1)], dim=-1)
438
  inputs["attention_mask"] = torch.cat([inputs["attention_mask"], torch.ones_like(next_token.unsqueeze(-1))], dim=-1)
439
 
440
  with torch.no_grad():
441
+ try:
442
+ outputs = model(**inputs)
443
+ if isinstance(outputs, tuple):
444
+ logits = outputs[0]
445
+ else:
446
+ logits = outputs.logits if hasattr(outputs, 'logits') else outputs
447
+
448
+ next_token = torch.argmax(logits[:, -1, :], dim=-1)
449
+ generated_tokens.append(next_token)
450
+
451
+ if next_token.item() == tokenizer.eos_token_id:
452
+ print(f" EOS ํ† ํฐ ๋ฐœ๊ฒฌ: {i}๋ฒˆ์งธ")
453
+ break
454
+ except Exception as loop_error:
455
+ print(f"โŒ DEBUG: ํ† ํฐ ์ƒ์„ฑ ๋ฃจํ”„ ์˜ค๋ฅ˜ (i={i}): {loop_error}")
456
+ raise loop_error
457
 
458
+ print("๐Ÿ”ค DEBUG: ํ† ํฐ ๋””์ฝ”๋”ฉ ์‹œ์ž‘")
459
  # ์ƒ์„ฑ๋œ ํ† ํฐ๋“ค์„ ๋””์ฝ”๋”ฉ
460
  generated_ids = torch.cat(generated_tokens, dim=0)
461
  response = tokenizer.decode(generated_ids, skip_special_tokens=True)
462
+ print(f" ์›๋ณธ ์‘๋‹ต: {response[:200]}...")
463
+
464
+ if full_prompt in response:
465
+ response = response.replace(full_prompt, "").strip()
466
+ print(f" ์ •๋ฆฌ๋œ ์‘๋‹ต: {response[:200]}...")
467
 
468
+ print("โœ… DEBUG: solve_math_problem ์™„๋ฃŒ")
 
469
  return response if response else "์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ์ˆ˜ํ•™ ๋ฌธ์ œ๋ฅผ ํ’€ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
470
  except Exception as e:
471
+ print(f"โŒ DEBUG: solve_math_problem ์ „์ฒด ์˜ค๋ฅ˜: {e}")
472
+ print(f" ์˜ค๋ฅ˜ ํƒ€์ž…: {type(e).__name__}")
473
+ import traceback
474
+ traceback.print_exc()
475
  return f"์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"
476
 
477
  with gr.Blocks(title="Lily Math RAG System", theme=gr.themes.Soft()) as demo:
 
485
  msg = gr.Textbox(label="๋ฉ”์‹œ์ง€๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”", placeholder="์•ˆ๋…•ํ•˜์„ธ์š”! ์ˆ˜ํ•™ ๋ฌธ์ œ๋ฅผ ๋„์™€์ฃผ์„ธ์š”.", lines=2)
486
  clear = gr.Button("๋Œ€ํ™” ์ดˆ๊ธฐํ™”")
487
  with gr.Column(scale=1):
488
+ gr.Markdown("### ๐Ÿ“ ํŒŒ์ผ ์—…๋กœ๋“œ")
489
+ file_input = gr.File(label="PDF/์ด๋ฏธ์ง€ ํŒŒ์ผ (์„ ํƒ์‚ฌํ•ญ)", file_types=[".pdf", ".png", ".jpg", ".jpeg"])
490
+ gr.Markdown("PDF๋‚˜ ์ด๋ฏธ์ง€ ํŒŒ์ผ์„ ์—…๋กœ๋“œํ•˜๋ฉด ๋ฌธ์„œ๋ฅผ ํ•ด์„ํ•˜์—ฌ ๋‹ต๋ณ€ํ•ฉ๋‹ˆ๋‹ค.")
491
 
492
+ def respond(message, chat_history, file):
493
+ bot_message = chat_with_model(message, chat_history, file)
494
  chat_history.append({"role": "user", "content": message})
495
  chat_history.append({"role": "assistant", "content": bot_message})
496
  return "", chat_history
497
+ msg.submit(respond, [msg, chatbot, file_input], [msg, chatbot])
498
  clear.click(lambda: None, None, chatbot, queue=False)
499
 
500
  with gr.Tab("๐Ÿงฎ ์ˆ˜ํ•™ ๋ฌธ์ œ ํ•ด๊ฒฐ"):
 
503
  math_input = gr.Textbox(label="์ˆ˜ํ•™ ๋ฌธ์ œ", placeholder="์˜ˆ: 2x + 5 = 13", lines=3)
504
  solve_btn = gr.Button("๋ฌธ์ œ ํ’€๊ธฐ", variant="primary")
505
  with gr.Column(scale=1):
506
+ gr.Markdown("### ๐Ÿ“ ํŒŒ์ผ ์—…๋กœ๋“œ")
507
+ math_file_input = gr.File(label="์ˆ˜ํ•™ ๋ฌธ์ œ ํŒŒ์ผ (์„ ํƒ์‚ฌํ•ญ)", file_types=[".pdf", ".png", ".jpg", ".jpeg"])
508
+ gr.Markdown("์ˆ˜ํ•™ ๋ฌธ์ œ PDF๋‚˜ ์ด๋ฏธ์ง€๋ฅผ ์—…๋กœ๋“œํ•˜๋ฉด ๋” ์ •ํ™•ํ•œ ๋‹ต๋ณ€์„ ๋ฐ›์„ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.")
509
  with gr.Column(scale=2):
510
  math_output = gr.Textbox(label="ํ•ด๋‹ต", lines=8, interactive=False)
511
+ solve_btn.click(solve_math_problem, [math_input, math_file_input], math_output)
512
 
513
  with gr.Tab("โš™๏ธ ์„ค์ •"):
514
  gr.Markdown("## ์‹œ์Šคํ…œ ์ •๋ณด")
requirements.txt CHANGED
@@ -8,3 +8,4 @@ python-dotenv>=1.0.0
8
  Pillow>=9.0.0
9
  torchvision>=0.15.0
10
  accelerate==1.9.0
 
 
8
  Pillow>=9.0.0
9
  torchvision>=0.15.0
10
  accelerate==1.9.0
11
+ PyMuPDF>=1.23.0