openfree commited on
Commit
3116318
ยท
verified ยท
1 Parent(s): 59be132

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +138 -131
app.py CHANGED
@@ -15,10 +15,15 @@ import torch
15
  import numpy as np
16
  from loguru import logger
17
  from PIL import Image
18
- from transformers import AutoProcessor, Gemma3ForConditionalGeneration, TextIteratorStreamer
19
  import time
20
  import warnings
21
  from typing import Dict, List, Optional, Union
 
 
 
 
 
 
22
 
23
  # CSV/TXT ๋ถ„์„
24
  import pandas as pd
@@ -27,7 +32,7 @@ import PyPDF2
27
 
28
  warnings.filterwarnings('ignore')
29
 
30
- print("๐ŸŽฎ ๋กœ๋ด‡ ์‹œ๊ฐ ์‹œ์Šคํ…œ ์ดˆ๊ธฐํ™” (Gemma3-R1984-4B)...")
31
 
32
  ##############################################################################
33
  # ์ƒ์ˆ˜ ์ •์˜
@@ -40,10 +45,9 @@ SERPHOUSE_API_KEY = os.getenv("SERPHOUSE_API_KEY", "")
40
  ##############################################################################
41
  # ์ „์—ญ ๋ณ€์ˆ˜
42
  ##############################################################################
43
- model = None
44
- processor = None
45
  model_loaded = False
46
- model_name = "Gemma3-R1984-4B"
47
 
48
  ##############################################################################
49
  # ๋ฉ”๋ชจ๋ฆฌ ๊ด€๋ฆฌ
@@ -85,8 +89,8 @@ def do_web_search(query: str) -> str:
85
  "domain": "google.com",
86
  "serp_type": "web",
87
  "device": "desktop",
88
- "lang": "ko", # ํ•œ๊ตญ์–ด ์šฐ์„ 
89
- "num": "10" # 10๊ฐœ๋กœ ์ œํ•œ
90
  }
91
 
92
  headers = {
@@ -190,29 +194,57 @@ def pdf_to_markdown(pdf_path: str) -> str:
190
 
191
  return f"**[PDF ํŒŒ์ผ: {os.path.basename(pdf_path)}]**\n\n{full_text}"
192
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
  ##############################################################################
194
  # ๋ชจ๋ธ ๋กœ๋“œ
195
  ##############################################################################
196
  @spaces.GPU(duration=120)
197
  def load_model():
198
- global model, processor, model_loaded
199
 
200
  if model_loaded:
201
  logger.info("๋ชจ๋ธ์ด ์ด๋ฏธ ๋กœ๋“œ๋˜์–ด ์žˆ์Šต๋‹ˆ๋‹ค.")
202
  return True
203
 
204
  try:
205
- logger.info("Gemma3-R1984-4B ๋ชจ๋ธ ๋กœ๋”ฉ ์‹œ์ž‘...")
206
  clear_cuda_cache()
207
 
208
- model_id = os.getenv("MODEL_ID", "VIDraft/Gemma-3-R1984-4B")
 
 
 
 
 
209
 
210
- processor = AutoProcessor.from_pretrained(model_id, padding_side="left")
211
- model = Gemma3ForConditionalGeneration.from_pretrained(
212
- model_id,
213
- device_map="auto",
214
- torch_dtype=torch.bfloat16,
215
- attn_implementation="eager"
 
 
 
 
 
 
 
 
 
 
216
  )
217
 
218
  model_loaded = True
@@ -223,6 +255,38 @@ def load_model():
223
  logger.error(f"๋ชจ๋ธ ๋กœ๋”ฉ ์‹คํŒจ: {e}")
224
  return False
225
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
  ##############################################################################
227
  # ์ด๋ฏธ์ง€ ๋ถ„์„ (๋กœ๋ด‡ ํƒœ์Šคํฌ ์ค‘์‹ฌ)
228
  ##############################################################################
@@ -232,22 +296,21 @@ def analyze_image_for_robot(
232
  prompt: str,
233
  task_type: str = "general",
234
  use_web_search: bool = False,
235
- enable_thinking: bool = False, # ๊ธฐ๋ณธ๊ฐ’ False๋กœ ๋ณ€๊ฒฝ
236
- max_new_tokens: int = 300 # ์žฅ๋ฉด ์„ค๋ช…์„ ์œ„ํ•ด 300์œผ๋กœ ์ฆ๊ฐ€
237
  ) -> str:
238
  """๋กœ๋ด‡ ์ž‘์—…์„ ์œ„ํ•œ ์ด๋ฏธ์ง€ ๋ถ„์„"""
239
- global model, processor
240
 
241
  if not model_loaded:
242
  if not load_model():
243
  return "โŒ ๋ชจ๋ธ ๋กœ๋”ฉ ์‹คํŒจ"
244
 
245
  try:
246
- # numpy ๋ฐฐ์—ด์„ PIL ์ด๋ฏธ์ง€๋กœ ๋ณ€ํ™˜
247
- if isinstance(image, np.ndarray):
248
- image = Image.fromarray(image).convert('RGB')
249
 
250
- # ํƒœ์Šคํฌ๋ณ„ ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ ๊ตฌ์„ฑ (๋” ๊ฐ„๊ฒฐํ•˜๊ฒŒ)
251
  system_prompts = {
252
  "general": "๋‹น์‹ ์€ ๋กœ๋ด‡ ์‹œ๊ฐ ์‹œ์Šคํ…œ์ž…๋‹ˆ๋‹ค. ๋จผ์ € ์žฅ๋ฉด์„ 1-2์ค„๋กœ ์„ค๋ช…ํ•˜๊ณ , ํ•ต์‹ฌ ๋‚ด์šฉ์„ ๊ฐ„๊ฒฐํ•˜๊ฒŒ ๋ถ„์„ํ•˜์„ธ์š”.",
253
  "planning": """๋‹น์‹ ์€ ๋กœ๋ด‡ ์ž‘์—… ๊ณ„ํš AI์ž…๋‹ˆ๋‹ค.
@@ -281,64 +344,21 @@ Step_n: xxx""",
281
  combined_system = f"{search_results}\n\n{system_prompt}"
282
 
283
  # ๋ฉ”์‹œ์ง€ ๊ตฌ์„ฑ
284
- messages = [
285
- {
286
- "role": "system",
287
- "content": [{"type": "text", "text": combined_system}]
288
- },
289
- {
290
- "role": "user",
291
- "content": [
292
- {"type": "image", "url": image},
293
- {"type": "text", "text": prompt}
294
- ]
295
- }
296
- ]
297
-
298
- # ์ž…๋ ฅ ์ฒ˜๋ฆฌ
299
- inputs = processor.apply_chat_template(
300
- messages,
301
- add_generation_prompt=True,
302
- tokenize=True,
303
- return_dict=True,
304
- return_tensors="pt",
305
- ).to(device=model.device, dtype=torch.bfloat16)
306
-
307
- # ์ž…๋ ฅ ํ† ํฐ ์ˆ˜ ์ œํ•œ
308
- if inputs.input_ids.shape[1] > MAX_INPUT_LENGTH:
309
- inputs.input_ids = inputs.input_ids[:, -MAX_INPUT_LENGTH:]
310
- if 'attention_mask' in inputs:
311
- inputs.attention_mask = inputs.attention_mask[:, -MAX_INPUT_LENGTH:]
312
 
313
  # ์ƒ์„ฑ
314
- with torch.no_grad():
315
- outputs = model.generate(
316
- **inputs,
317
- max_new_tokens=max_new_tokens,
318
- do_sample=True,
319
- temperature=0.7,
320
- top_p=0.9,
321
- pad_token_id=processor.tokenizer.pad_token_id,
322
- eos_token_id=processor.tokenizer.eos_token_id,
323
- )
324
-
325
- # ์ž…๋ ฅ ํ† ํฐ ์ œ๊ฑฐํ•˜์—ฌ ์ถœ๋ ฅ๋งŒ ์ถ”์ถœ
326
- generated_tokens = outputs[0][inputs.input_ids.shape[1]:]
327
-
328
- # ๋””์ฝ”๋”ฉ
329
- response = processor.decode(generated_tokens, skip_special_tokens=True).strip()
330
-
331
- # ํ”„๋กฌํ”„ํŠธ ์ œ๊ฑฐ ๋ฐ ์ •๋ฆฌ
332
- # ์ด๋ฏธ ์ž…๋ ฅ ํ† ํฐ์„ ์ œ๊ฑฐํ–ˆ์œผ๋ฏ€๋กœ ์ถ”๊ฐ€ ์ •๋ฆฌ๋งŒ ์ˆ˜ํ–‰
333
- response = response.strip()
334
 
335
- # ํ˜น์‹œ ๋‚จ์•„์žˆ๋Š” ๋ถˆํ•„์š”ํ•œ ํ…์ŠคํŠธ ์ œ๊ฑฐ
336
- if response.startswith("model\n"):
337
- response = response[6:].strip()
338
- elif response.startswith("model"):
339
- response = response[5:].strip()
340
 
341
- return response
342
 
343
  except Exception as e:
344
  logger.error(f"์ด๋ฏธ์ง€ ๋ถ„์„ ์˜ค๋ฅ˜: {e}")
@@ -350,16 +370,6 @@ Step_n: xxx""",
350
  ##############################################################################
351
  # ๋ฌธ์„œ ๋ถ„์„ (์ŠคํŠธ๋ฆฌ๋ฐ)
352
  ##############################################################################
353
- def _model_gen_with_oom_catch(**kwargs):
354
- """OOM ์ฒ˜๋ฆฌ๋ฅผ ์œ„ํ•œ ์ƒ์„ฑ ํ•จ์ˆ˜"""
355
- global model
356
- try:
357
- model.generate(**kwargs)
358
- except torch.cuda.OutOfMemoryError:
359
- raise RuntimeError("GPU ๋ฉ”๋ชจ๋ฆฌ ๋ถ€์กฑ. Max Tokens๋ฅผ ์ค„์—ฌ์ฃผ์„ธ์š”.")
360
- finally:
361
- clear_cuda_cache()
362
-
363
  @spaces.GPU(duration=120)
364
  def analyze_documents_streaming(
365
  files: List[str],
@@ -368,7 +378,7 @@ def analyze_documents_streaming(
368
  max_new_tokens: int = 2048
369
  ) -> Iterator[str]:
370
  """๋ฌธ์„œ ๋ถ„์„ (์ŠคํŠธ๋ฆฌ๋ฐ)"""
371
- global model, processor
372
 
373
  if not model_loaded:
374
  if not load_model():
@@ -399,48 +409,32 @@ def analyze_documents_streaming(
399
  continue
400
  doc_contents.append(content)
401
 
 
 
 
402
  # ๋ฉ”์‹œ์ง€ ๊ตฌ์„ฑ
403
  messages = [
404
- {
405
- "role": "system",
406
- "content": [{"type": "text", "text": system_content}]
407
- },
408
- {
409
- "role": "user",
410
- "content": [
411
- {"type": "text", "text": "\n\n".join(doc_contents) + f"\n\n{prompt}"}
412
- ]
413
- }
414
  ]
415
 
416
- # ์ž…๋ ฅ ์ฒ˜๋ฆฌ
417
- inputs = processor.apply_chat_template(
418
- messages,
419
- add_generation_prompt=True,
420
- tokenize=True,
421
- return_dict=True,
422
- return_tensors="pt",
423
- ).to(device=model.device, dtype=torch.bfloat16)
424
-
425
- # ์ŠคํŠธ๋ฆฌ๋ฐ ์„ค์ •
426
- streamer = TextIteratorStreamer(processor, timeout=30.0, skip_prompt=True, skip_special_tokens=True)
427
- gen_kwargs = dict(
428
- inputs,
429
- streamer=streamer,
430
- max_new_tokens=max_new_tokens,
431
  temperature=0.8,
432
  top_p=0.9,
 
433
  )
434
 
435
- # ๋ณ„๋„ ์Šค๋ ˆ๋“œ์—์„œ ์ƒ์„ฑ
436
- t = Thread(target=_model_gen_with_oom_catch, kwargs=gen_kwargs)
437
- t.start()
438
-
439
  # ์ŠคํŠธ๋ฆฌ๋ฐ ์ถœ๋ ฅ
440
  output = ""
441
- for new_text in streamer:
442
- output += new_text
443
- yield output
 
 
 
444
 
445
  except Exception as e:
446
  logger.error(f"๋ฌธ์„œ ๋ถ„์„ ์˜ค๋ฅ˜: {e}")
@@ -494,17 +488,30 @@ css = """
494
  background: #e8f5e9;
495
  color: #2e7d32;
496
  }
 
 
 
 
 
 
 
 
497
  """
498
 
499
- with gr.Blocks(title="๐Ÿค– ๋กœ๋ด‡ ์‹œ๊ฐ ์‹œ์Šคํ…œ (Gemma3-4B)", css=css) as demo:
500
  gr.HTML("""
501
  <div class="robot-header">
502
  <h1>๐Ÿค– ๋กœ๋ด‡ ์‹œ๊ฐ ์‹œ์Šคํ…œ</h1>
503
- <h3>๐ŸŽฎ Gemma3-R1984-4B + ๐Ÿ“ท ์‹ค์‹œ๊ฐ„ ์›น์บ  + ๐Ÿ” ์›น ๊ฒ€์ƒ‰</h3>
504
- <p>โšก ์ตœ์‹  ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ AI๋กœ ๋กœ๋ด‡ ์ž‘์—… ๋ถ„์„ ๋ฐ ๊ณ„ํš ์ˆ˜๋ฆฝ!</p>
505
  </div>
506
  """)
507
 
 
 
 
 
 
508
 
509
  with gr.Row():
510
  # ์™ผ์ชฝ: ์›น์บ  ๋ฐ ์ž…๋ ฅ
@@ -575,15 +582,15 @@ with gr.Blocks(title="๐Ÿค– ๋กœ๋ด‡ ์‹œ๊ฐ ์‹œ์Šคํ…œ (Gemma3-4B)", css=css) as dem
575
 
576
  enable_thinking = gr.Checkbox(
577
  label="๐Ÿค” ์ถ”๋ก  ๊ณผ์ • ํ‘œ์‹œ",
578
- value=False, # ๊ธฐ๋ณธ๊ฐ’ False๋กœ ๋ณ€๊ฒฝ
579
  info="Chain-of-Thought ์ถ”๋ก  ๊ณผ์ •์„ ๋ณด์—ฌ์ค๋‹ˆ๋‹ค"
580
  )
581
 
582
  max_tokens = gr.Slider(
583
  label="์ตœ๋Œ€ ํ† ํฐ ์ˆ˜",
584
  minimum=100,
585
- maximum=4096,
586
- value=300, # ์žฅ๋ฉด ์„ค๋ช…์„ ์œ„ํ•ด 300์œผ๋กœ ์ฆ๊ฐ€
587
  step=50
588
  )
589
 
@@ -600,8 +607,8 @@ with gr.Blocks(title="๐Ÿค– ๋กœ๋ด‡ ์‹œ๊ฐ ์‹œ์Šคํ…œ (Gemma3-4B)", css=css) as dem
600
  '<div class="status-box" style="background:#d4edda; color:#155724;">๐ŸŽฎ ์‹œ์Šคํ…œ ์ค€๋น„ ์™„๋ฃŒ</div>'
601
  )
602
 
603
- # ๋ฌธ์„œ ๋ถ„์„ ํƒญ (์ˆจ๊น€ ์ฒ˜๋ฆฌ)
604
- with gr.Tab("๐Ÿ“„ ๋ฌธ์„œ ๋ถ„์„", visible=False): # visible=False๋กœ ์ˆจ๊น€
605
  with gr.Row():
606
  with gr.Column():
607
  doc_files = gr.File(
@@ -661,7 +668,7 @@ with gr.Blocks(title="๐Ÿค– ๋กœ๋ด‡ ์‹œ๊ฐ ์‹œ์Šคํ…œ (Gemma3-4B)", css=css) as dem
661
  max_new_tokens=tokens
662
  )
663
 
664
- # ๊ฒฐ๊ณผ ํฌ๋งทํŒ… (๋” ๊ฐ„๊ฒฐํ•˜๊ฒŒ)
665
  timestamp = time.strftime("%H:%M:%S")
666
  task_names = {
667
  "planning": "์ž‘์—… ๊ณ„ํš",
@@ -776,7 +783,7 @@ with gr.Blocks(title="๐Ÿค– ๋กœ๋ด‡ ์‹œ๊ฐ ์‹œ์Šคํ…œ (Gemma3-4B)", css=css) as dem
776
  )
777
 
778
  # ์ž๋™ ์บก์ฒ˜ ํƒ€์ด๋จธ (10์ดˆ๋งˆ๋‹ค)
779
- timer = gr.Timer(10.0, active=False) # 10์ดˆ ํƒ€์ด๋จธ, ์ดˆ๊ธฐ์—๋Š” ๋น„ํ™œ์„ฑํ™”
780
 
781
  # ์ž๋™ ์บก์ฒ˜ ํ† ๊ธ€ ์ด๋ฒคํŠธ
782
  def toggle_auto_capture(enabled):
@@ -809,7 +816,7 @@ with gr.Blocks(title="๐Ÿค– ๋กœ๋ด‡ ์‹œ๊ฐ ์‹œ์Šคํ…œ (Gemma3-4B)", css=css) as dem
809
  )
810
 
811
  if __name__ == "__main__":
812
- print("๐Ÿš€ ๋กœ๋ด‡ ์‹œ๊ฐ ์‹œ์Šคํ…œ ์‹œ์ž‘ (Gemma3-R1984-4B)...")
813
  demo.launch(
814
  server_name="0.0.0.0",
815
  server_port=7860,
 
15
  import numpy as np
16
  from loguru import logger
17
  from PIL import Image
 
18
  import time
19
  import warnings
20
  from typing import Dict, List, Optional, Union
21
+ import base64
22
+ from io import BytesIO
23
+
24
+ # llama-cpp-python for GGUF
25
+ from llama_cpp import Llama
26
+ from llama_cpp.llama_chat_format import Llava16ChatHandler
27
 
28
  # CSV/TXT ๋ถ„์„
29
  import pandas as pd
 
32
 
33
  warnings.filterwarnings('ignore')
34
 
35
+ print("๐ŸŽฎ ๋กœ๋ด‡ ์‹œ๊ฐ ์‹œ์Šคํ…œ ์ดˆ๊ธฐํ™” (Gemma3-R1984-4B GGUF Q4_K_M)...")
36
 
37
  ##############################################################################
38
  # ์ƒ์ˆ˜ ์ •์˜
 
45
  ##############################################################################
46
  # ์ „์—ญ ๋ณ€์ˆ˜
47
  ##############################################################################
48
+ llm = None
 
49
  model_loaded = False
50
+ model_name = "Gemma3-R1984-4B-Q4_K_M"
51
 
52
  ##############################################################################
53
  # ๋ฉ”๋ชจ๋ฆฌ ๊ด€๋ฆฌ
 
89
  "domain": "google.com",
90
  "serp_type": "web",
91
  "device": "desktop",
92
+ "lang": "ko",
93
+ "num": "10"
94
  }
95
 
96
  headers = {
 
194
 
195
  return f"**[PDF ํŒŒ์ผ: {os.path.basename(pdf_path)}]**\n\n{full_text}"
196
 
197
+ ##############################################################################
198
+ # ์ด๋ฏธ์ง€๋ฅผ base64๋กœ ๋ณ€ํ™˜
199
+ ##############################################################################
200
+ def image_to_base64_data_uri(image: Union[np.ndarray, Image.Image]) -> str:
201
+ """์ด๋ฏธ์ง€๋ฅผ base64 data URI๋กœ ๋ณ€ํ™˜"""
202
+ if isinstance(image, np.ndarray):
203
+ image = Image.fromarray(image).convert('RGB')
204
+
205
+ buffered = BytesIO()
206
+ image.save(buffered, format="JPEG", quality=85)
207
+ img_str = base64.b64encode(buffered.getvalue()).decode()
208
+ return f"data:image/jpeg;base64,{img_str}"
209
+
210
  ##############################################################################
211
  # ๋ชจ๋ธ ๋กœ๋“œ
212
  ##############################################################################
213
  @spaces.GPU(duration=120)
214
  def load_model():
215
+ global llm, model_loaded
216
 
217
  if model_loaded:
218
  logger.info("๋ชจ๋ธ์ด ์ด๋ฏธ ๋กœ๋“œ๋˜์–ด ์žˆ์Šต๋‹ˆ๋‹ค.")
219
  return True
220
 
221
  try:
222
+ logger.info("Gemma3-R1984-4B GGUF Q4_K_M ๋ชจ๋ธ ๋กœ๋”ฉ ์‹œ์ž‘...")
223
  clear_cuda_cache()
224
 
225
+ # ๋ชจ๋ธ ๊ฒฝ๋กœ ์„ค์ •
226
+ model_path = os.getenv("MODEL_PATH", "VIDraft/Gemma-3-R1984-4B-GGUF/Gemma-3-R1984-4B.Q4_K_M.gguf")
227
+ mmproj_path = os.getenv("MMPROJ_PATH", "VIDraft/Gemma-3-R1984-4B-GGUF/Gemma-3-R1984-4B.mmproj-Q8_0.gguf")
228
+
229
+ # GPU ์‚ฌ์šฉ ๊ฐ€๋Šฅ ์—ฌ๋ถ€ ํ™•์ธ
230
+ n_gpu_layers = -1 if torch.cuda.is_available() else 0
231
 
232
+ # ์ฑ„ํŒ… ํ•ธ๋“ค๋Ÿฌ ์ƒ์„ฑ (๋น„์ „ ์ง€์›)
233
+ chat_handler = Llava16ChatHandler(
234
+ clip_model_path=mmproj_path,
235
+ verbose=False
236
+ )
237
+
238
+ # ๋ชจ๋ธ ๋กœ๋“œ
239
+ llm = Llama(
240
+ model_path=model_path,
241
+ chat_handler=chat_handler,
242
+ n_ctx=4096, # ์ปจํ…์ŠคํŠธ ํฌ๊ธฐ
243
+ n_gpu_layers=n_gpu_layers, # GPU ๋ ˆ์ด์–ด
244
+ n_threads=8, # CPU ์Šค๋ ˆ๋“œ
245
+ verbose=False,
246
+ seed=42,
247
+ logits_all=True, # ๋น„์ „ ๋ชจ๋ธ์— ํ•„์š”
248
  )
249
 
250
  model_loaded = True
 
255
  logger.error(f"๋ชจ๋ธ ๋กœ๋”ฉ ์‹คํŒจ: {e}")
256
  return False
257
 
258
+ ##############################################################################
259
+ # ์ฑ„ํŒ… ํ…œํ”Œ๋ฆฟ ํฌ๋งทํŒ…
260
+ ##############################################################################
261
+ def format_chat_prompt(system_prompt: str, user_prompt: str, image_uri: Optional[str] = None) -> List[Dict]:
262
+ """Gemma ์Šคํƒ€์ผ ์ฑ„ํŒ… ํ”„๋กฌํ”„ํŠธ ์ƒ์„ฑ"""
263
+ messages = []
264
+
265
+ # ์‹œ์Šคํ…œ ๋ฉ”์‹œ์ง€
266
+ messages.append({
267
+ "role": "system",
268
+ "content": system_prompt
269
+ })
270
+
271
+ # ์‚ฌ์šฉ์ž ๋ฉ”์‹œ์ง€
272
+ user_content = []
273
+ if image_uri:
274
+ user_content.append({
275
+ "type": "image_url",
276
+ "image_url": {"url": image_uri}
277
+ })
278
+ user_content.append({
279
+ "type": "text",
280
+ "text": user_prompt
281
+ })
282
+
283
+ messages.append({
284
+ "role": "user",
285
+ "content": user_content
286
+ })
287
+
288
+ return messages
289
+
290
  ##############################################################################
291
  # ์ด๋ฏธ์ง€ ๋ถ„์„ (๋กœ๋ด‡ ํƒœ์Šคํฌ ์ค‘์‹ฌ)
292
  ##############################################################################
 
296
  prompt: str,
297
  task_type: str = "general",
298
  use_web_search: bool = False,
299
+ enable_thinking: bool = False,
300
+ max_new_tokens: int = 300
301
  ) -> str:
302
  """๋กœ๋ด‡ ์ž‘์—…์„ ์œ„ํ•œ ์ด๋ฏธ์ง€ ๋ถ„์„"""
303
+ global llm
304
 
305
  if not model_loaded:
306
  if not load_model():
307
  return "โŒ ๋ชจ๋ธ ๋กœ๋”ฉ ์‹คํŒจ"
308
 
309
  try:
310
+ # ์ด๋ฏธ์ง€๋ฅผ base64๋กœ ๋ณ€ํ™˜
311
+ image_uri = image_to_base64_data_uri(image)
 
312
 
313
+ # ํƒœ์Šคํฌ๋ณ„ ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ ๊ตฌ์„ฑ
314
  system_prompts = {
315
  "general": "๋‹น์‹ ์€ ๋กœ๋ด‡ ์‹œ๊ฐ ์‹œ์Šคํ…œ์ž…๋‹ˆ๋‹ค. ๋จผ์ € ์žฅ๋ฉด์„ 1-2์ค„๋กœ ์„ค๋ช…ํ•˜๊ณ , ํ•ต์‹ฌ ๋‚ด์šฉ์„ ๊ฐ„๊ฒฐํ•˜๊ฒŒ ๋ถ„์„ํ•˜์„ธ์š”.",
316
  "planning": """๋‹น์‹ ์€ ๋กœ๋ด‡ ์ž‘์—… ๊ณ„ํš AI์ž…๋‹ˆ๋‹ค.
 
344
  combined_system = f"{search_results}\n\n{system_prompt}"
345
 
346
  # ๋ฉ”์‹œ์ง€ ๊ตฌ์„ฑ
347
+ messages = format_chat_prompt(combined_system, prompt, image_uri)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
348
 
349
  # ์ƒ์„ฑ
350
+ response = llm.create_chat_completion(
351
+ messages=messages,
352
+ max_tokens=max_new_tokens,
353
+ temperature=0.7,
354
+ top_p=0.9,
355
+ stream=False
356
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
357
 
358
+ # ์‘๋‹ต ์ถ”์ถœ
359
+ result = response['choices'][0]['message']['content'].strip()
 
 
 
360
 
361
+ return result
362
 
363
  except Exception as e:
364
  logger.error(f"์ด๋ฏธ์ง€ ๋ถ„์„ ์˜ค๋ฅ˜: {e}")
 
370
  ##############################################################################
371
  # ๋ฌธ์„œ ๋ถ„์„ (์ŠคํŠธ๋ฆฌ๋ฐ)
372
  ##############################################################################
 
 
 
 
 
 
 
 
 
 
373
  @spaces.GPU(duration=120)
374
  def analyze_documents_streaming(
375
  files: List[str],
 
378
  max_new_tokens: int = 2048
379
  ) -> Iterator[str]:
380
  """๋ฌธ์„œ ๋ถ„์„ (์ŠคํŠธ๋ฆฌ๋ฐ)"""
381
+ global llm
382
 
383
  if not model_loaded:
384
  if not load_model():
 
409
  continue
410
  doc_contents.append(content)
411
 
412
+ # ์ „์ฒด ํ”„๋กฌํ”„ํŠธ ๊ตฌ์„ฑ
413
+ full_prompt = "\n\n".join(doc_contents) + f"\n\n{prompt}"
414
+
415
  # ๋ฉ”์‹œ์ง€ ๊ตฌ์„ฑ
416
  messages = [
417
+ {"role": "system", "content": system_content},
418
+ {"role": "user", "content": full_prompt}
 
 
 
 
 
 
 
 
419
  ]
420
 
421
+ # ์ŠคํŠธ๋ฆฌ๋ฐ ์ƒ์„ฑ
422
+ stream = llm.create_chat_completion(
423
+ messages=messages,
424
+ max_tokens=max_new_tokens,
 
 
 
 
 
 
 
 
 
 
 
425
  temperature=0.8,
426
  top_p=0.9,
427
+ stream=True
428
  )
429
 
 
 
 
 
430
  # ์ŠคํŠธ๋ฆฌ๋ฐ ์ถœ๋ ฅ
431
  output = ""
432
+ for chunk in stream:
433
+ if 'choices' in chunk and len(chunk['choices']) > 0:
434
+ delta = chunk['choices'][0].get('delta', {})
435
+ if 'content' in delta:
436
+ output += delta['content']
437
+ yield output
438
 
439
  except Exception as e:
440
  logger.error(f"๋ฌธ์„œ ๋ถ„์„ ์˜ค๋ฅ˜: {e}")
 
488
  background: #e8f5e9;
489
  color: #2e7d32;
490
  }
491
+ .model-info {
492
+ background: #fff3cd;
493
+ color: #856404;
494
+ padding: 10px;
495
+ border-radius: 5px;
496
+ margin: 10px 0;
497
+ text-align: center;
498
+ }
499
  """
500
 
501
+ with gr.Blocks(title="๐Ÿค– ๋กœ๋ด‡ ์‹œ๊ฐ ์‹œ์Šคํ…œ (Gemma3-4B GGUF)", css=css) as demo:
502
  gr.HTML("""
503
  <div class="robot-header">
504
  <h1>๐Ÿค– ๋กœ๋ด‡ ์‹œ๊ฐ ์‹œ์Šคํ…œ</h1>
505
+ <h3>๐ŸŽฎ Gemma3-R1984-4B GGUF Q4_K_M + ๐Ÿ“ท ์‹ค์‹œ๊ฐ„ ์›น์บ  + ๐Ÿ” ์›น ๊ฒ€์ƒ‰</h3>
506
+ <p>โšก ์–‘์žํ™” ๋ชจ๋ธ๋กœ ๋” ๋น ๋ฅด๊ณ  ํšจ์œจ์ ์ธ ๋กœ๋ด‡ ์ž‘์—… ๋ถ„์„!</p>
507
  </div>
508
  """)
509
 
510
+ gr.HTML("""
511
+ <div class="model-info">
512
+ <strong>๋ชจ๋ธ:</strong> Gemma3-R1984-4B Q4_K_M (2.49GB) | <strong>๋ฉ”๋ชจ๋ฆฌ ์‚ฌ์šฉ:</strong> ~3-4GB VRAM
513
+ </div>
514
+ """)
515
 
516
  with gr.Row():
517
  # ์™ผ์ชฝ: ์›น์บ  ๋ฐ ์ž…๋ ฅ
 
582
 
583
  enable_thinking = gr.Checkbox(
584
  label="๐Ÿค” ์ถ”๋ก  ๊ณผ์ • ํ‘œ์‹œ",
585
+ value=False,
586
  info="Chain-of-Thought ์ถ”๋ก  ๊ณผ์ •์„ ๋ณด์—ฌ์ค๋‹ˆ๋‹ค"
587
  )
588
 
589
  max_tokens = gr.Slider(
590
  label="์ตœ๋Œ€ ํ† ํฐ ์ˆ˜",
591
  minimum=100,
592
+ maximum=2048,
593
+ value=300,
594
  step=50
595
  )
596
 
 
607
  '<div class="status-box" style="background:#d4edda; color:#155724;">๐ŸŽฎ ์‹œ์Šคํ…œ ์ค€๋น„ ์™„๋ฃŒ</div>'
608
  )
609
 
610
+ # ๋ฌธ์„œ ๋ถ„์„ ํƒญ
611
+ with gr.Tab("๐Ÿ“„ ๋ฌธ์„œ ๋ถ„์„", visible=False):
612
  with gr.Row():
613
  with gr.Column():
614
  doc_files = gr.File(
 
668
  max_new_tokens=tokens
669
  )
670
 
671
+ # ๊ฒฐ๊ณผ ํฌ๋งทํŒ…
672
  timestamp = time.strftime("%H:%M:%S")
673
  task_names = {
674
  "planning": "์ž‘์—… ๊ณ„ํš",
 
783
  )
784
 
785
  # ์ž๋™ ์บก์ฒ˜ ํƒ€์ด๋จธ (10์ดˆ๋งˆ๋‹ค)
786
+ timer = gr.Timer(10.0, active=False)
787
 
788
  # ์ž๋™ ์บก์ฒ˜ ํ† ๊ธ€ ์ด๋ฒคํŠธ
789
  def toggle_auto_capture(enabled):
 
816
  )
817
 
818
  if __name__ == "__main__":
819
+ print("๐Ÿš€ ๋กœ๋ด‡ ์‹œ๊ฐ ์‹œ์Šคํ…œ ์‹œ์ž‘ (Gemma3-R1984-4B GGUF Q4_K_M)...")
820
  demo.launch(
821
  server_name="0.0.0.0",
822
  server_port=7860,