Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -15,10 +15,15 @@ import torch
|
|
| 15 |
import numpy as np
|
| 16 |
from loguru import logger
|
| 17 |
from PIL import Image
|
| 18 |
-
from transformers import AutoProcessor, Gemma3ForConditionalGeneration, TextIteratorStreamer
|
| 19 |
import time
|
| 20 |
import warnings
|
| 21 |
from typing import Dict, List, Optional, Union
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
# CSV/TXT ๋ถ์
|
| 24 |
import pandas as pd
|
|
@@ -27,7 +32,7 @@ import PyPDF2
|
|
| 27 |
|
| 28 |
warnings.filterwarnings('ignore')
|
| 29 |
|
| 30 |
-
print("๐ฎ ๋ก๋ด ์๊ฐ ์์คํ
์ด๊ธฐํ (Gemma3-R1984-4B)...")
|
| 31 |
|
| 32 |
##############################################################################
|
| 33 |
# ์์ ์ ์
|
|
@@ -40,10 +45,9 @@ SERPHOUSE_API_KEY = os.getenv("SERPHOUSE_API_KEY", "")
|
|
| 40 |
##############################################################################
|
| 41 |
# ์ ์ญ ๋ณ์
|
| 42 |
##############################################################################
|
| 43 |
-
|
| 44 |
-
processor = None
|
| 45 |
model_loaded = False
|
| 46 |
-
model_name = "Gemma3-R1984-4B"
|
| 47 |
|
| 48 |
##############################################################################
|
| 49 |
# ๋ฉ๋ชจ๋ฆฌ ๊ด๋ฆฌ
|
|
@@ -85,8 +89,8 @@ def do_web_search(query: str) -> str:
|
|
| 85 |
"domain": "google.com",
|
| 86 |
"serp_type": "web",
|
| 87 |
"device": "desktop",
|
| 88 |
-
"lang": "ko",
|
| 89 |
-
"num": "10"
|
| 90 |
}
|
| 91 |
|
| 92 |
headers = {
|
|
@@ -190,29 +194,57 @@ def pdf_to_markdown(pdf_path: str) -> str:
|
|
| 190 |
|
| 191 |
return f"**[PDF ํ์ผ: {os.path.basename(pdf_path)}]**\n\n{full_text}"
|
| 192 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
##############################################################################
|
| 194 |
# ๋ชจ๋ธ ๋ก๋
|
| 195 |
##############################################################################
|
| 196 |
@spaces.GPU(duration=120)
|
| 197 |
def load_model():
|
| 198 |
-
global
|
| 199 |
|
| 200 |
if model_loaded:
|
| 201 |
logger.info("๋ชจ๋ธ์ด ์ด๋ฏธ ๋ก๋๋์ด ์์ต๋๋ค.")
|
| 202 |
return True
|
| 203 |
|
| 204 |
try:
|
| 205 |
-
logger.info("Gemma3-R1984-4B ๋ชจ๋ธ ๋ก๋ฉ ์์...")
|
| 206 |
clear_cuda_cache()
|
| 207 |
|
| 208 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 216 |
)
|
| 217 |
|
| 218 |
model_loaded = True
|
|
@@ -223,6 +255,38 @@ def load_model():
|
|
| 223 |
logger.error(f"๋ชจ๋ธ ๋ก๋ฉ ์คํจ: {e}")
|
| 224 |
return False
|
| 225 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 226 |
##############################################################################
|
| 227 |
# ์ด๋ฏธ์ง ๋ถ์ (๋ก๋ด ํ์คํฌ ์ค์ฌ)
|
| 228 |
##############################################################################
|
|
@@ -232,22 +296,21 @@ def analyze_image_for_robot(
|
|
| 232 |
prompt: str,
|
| 233 |
task_type: str = "general",
|
| 234 |
use_web_search: bool = False,
|
| 235 |
-
enable_thinking: bool = False,
|
| 236 |
-
max_new_tokens: int = 300
|
| 237 |
) -> str:
|
| 238 |
"""๋ก๋ด ์์
์ ์ํ ์ด๋ฏธ์ง ๋ถ์"""
|
| 239 |
-
global
|
| 240 |
|
| 241 |
if not model_loaded:
|
| 242 |
if not load_model():
|
| 243 |
return "โ ๋ชจ๋ธ ๋ก๋ฉ ์คํจ"
|
| 244 |
|
| 245 |
try:
|
| 246 |
-
#
|
| 247 |
-
|
| 248 |
-
image = Image.fromarray(image).convert('RGB')
|
| 249 |
|
| 250 |
-
# ํ์คํฌ๋ณ ์์คํ
ํ๋กฌํํธ ๊ตฌ์ฑ
|
| 251 |
system_prompts = {
|
| 252 |
"general": "๋น์ ์ ๋ก๋ด ์๊ฐ ์์คํ
์
๋๋ค. ๋จผ์ ์ฅ๋ฉด์ 1-2์ค๋ก ์ค๋ช
ํ๊ณ , ํต์ฌ ๋ด์ฉ์ ๊ฐ๊ฒฐํ๊ฒ ๋ถ์ํ์ธ์.",
|
| 253 |
"planning": """๋น์ ์ ๋ก๋ด ์์
๊ณํ AI์
๋๋ค.
|
|
@@ -281,64 +344,21 @@ Step_n: xxx""",
|
|
| 281 |
combined_system = f"{search_results}\n\n{system_prompt}"
|
| 282 |
|
| 283 |
# ๋ฉ์์ง ๊ตฌ์ฑ
|
| 284 |
-
messages =
|
| 285 |
-
{
|
| 286 |
-
"role": "system",
|
| 287 |
-
"content": [{"type": "text", "text": combined_system}]
|
| 288 |
-
},
|
| 289 |
-
{
|
| 290 |
-
"role": "user",
|
| 291 |
-
"content": [
|
| 292 |
-
{"type": "image", "url": image},
|
| 293 |
-
{"type": "text", "text": prompt}
|
| 294 |
-
]
|
| 295 |
-
}
|
| 296 |
-
]
|
| 297 |
-
|
| 298 |
-
# ์
๋ ฅ ์ฒ๋ฆฌ
|
| 299 |
-
inputs = processor.apply_chat_template(
|
| 300 |
-
messages,
|
| 301 |
-
add_generation_prompt=True,
|
| 302 |
-
tokenize=True,
|
| 303 |
-
return_dict=True,
|
| 304 |
-
return_tensors="pt",
|
| 305 |
-
).to(device=model.device, dtype=torch.bfloat16)
|
| 306 |
-
|
| 307 |
-
# ์
๋ ฅ ํ ํฐ ์ ์ ํ
|
| 308 |
-
if inputs.input_ids.shape[1] > MAX_INPUT_LENGTH:
|
| 309 |
-
inputs.input_ids = inputs.input_ids[:, -MAX_INPUT_LENGTH:]
|
| 310 |
-
if 'attention_mask' in inputs:
|
| 311 |
-
inputs.attention_mask = inputs.attention_mask[:, -MAX_INPUT_LENGTH:]
|
| 312 |
|
| 313 |
# ์์ฑ
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
pad_token_id=processor.tokenizer.pad_token_id,
|
| 322 |
-
eos_token_id=processor.tokenizer.eos_token_id,
|
| 323 |
-
)
|
| 324 |
-
|
| 325 |
-
# ์
๋ ฅ ํ ํฐ ์ ๊ฑฐํ์ฌ ์ถ๋ ฅ๋ง ์ถ์ถ
|
| 326 |
-
generated_tokens = outputs[0][inputs.input_ids.shape[1]:]
|
| 327 |
-
|
| 328 |
-
# ๋์ฝ๋ฉ
|
| 329 |
-
response = processor.decode(generated_tokens, skip_special_tokens=True).strip()
|
| 330 |
-
|
| 331 |
-
# ํ๋กฌํํธ ์ ๊ฑฐ ๋ฐ ์ ๋ฆฌ
|
| 332 |
-
# ์ด๋ฏธ ์
๋ ฅ ํ ํฐ์ ์ ๊ฑฐํ์ผ๋ฏ๋ก ์ถ๊ฐ ์ ๋ฆฌ๋ง ์ํ
|
| 333 |
-
response = response.strip()
|
| 334 |
|
| 335 |
-
#
|
| 336 |
-
|
| 337 |
-
response = response[6:].strip()
|
| 338 |
-
elif response.startswith("model"):
|
| 339 |
-
response = response[5:].strip()
|
| 340 |
|
| 341 |
-
return
|
| 342 |
|
| 343 |
except Exception as e:
|
| 344 |
logger.error(f"์ด๋ฏธ์ง ๋ถ์ ์ค๋ฅ: {e}")
|
|
@@ -350,16 +370,6 @@ Step_n: xxx""",
|
|
| 350 |
##############################################################################
|
| 351 |
# ๋ฌธ์ ๋ถ์ (์คํธ๋ฆฌ๋ฐ)
|
| 352 |
##############################################################################
|
| 353 |
-
def _model_gen_with_oom_catch(**kwargs):
|
| 354 |
-
"""OOM ์ฒ๋ฆฌ๋ฅผ ์ํ ์์ฑ ํจ์"""
|
| 355 |
-
global model
|
| 356 |
-
try:
|
| 357 |
-
model.generate(**kwargs)
|
| 358 |
-
except torch.cuda.OutOfMemoryError:
|
| 359 |
-
raise RuntimeError("GPU ๋ฉ๋ชจ๋ฆฌ ๋ถ์กฑ. Max Tokens๋ฅผ ์ค์ฌ์ฃผ์ธ์.")
|
| 360 |
-
finally:
|
| 361 |
-
clear_cuda_cache()
|
| 362 |
-
|
| 363 |
@spaces.GPU(duration=120)
|
| 364 |
def analyze_documents_streaming(
|
| 365 |
files: List[str],
|
|
@@ -368,7 +378,7 @@ def analyze_documents_streaming(
|
|
| 368 |
max_new_tokens: int = 2048
|
| 369 |
) -> Iterator[str]:
|
| 370 |
"""๋ฌธ์ ๋ถ์ (์คํธ๋ฆฌ๋ฐ)"""
|
| 371 |
-
global
|
| 372 |
|
| 373 |
if not model_loaded:
|
| 374 |
if not load_model():
|
|
@@ -399,48 +409,32 @@ def analyze_documents_streaming(
|
|
| 399 |
continue
|
| 400 |
doc_contents.append(content)
|
| 401 |
|
|
|
|
|
|
|
|
|
|
| 402 |
# ๋ฉ์์ง ๊ตฌ์ฑ
|
| 403 |
messages = [
|
| 404 |
-
{
|
| 405 |
-
|
| 406 |
-
"content": [{"type": "text", "text": system_content}]
|
| 407 |
-
},
|
| 408 |
-
{
|
| 409 |
-
"role": "user",
|
| 410 |
-
"content": [
|
| 411 |
-
{"type": "text", "text": "\n\n".join(doc_contents) + f"\n\n{prompt}"}
|
| 412 |
-
]
|
| 413 |
-
}
|
| 414 |
]
|
| 415 |
|
| 416 |
-
#
|
| 417 |
-
|
| 418 |
-
messages,
|
| 419 |
-
|
| 420 |
-
tokenize=True,
|
| 421 |
-
return_dict=True,
|
| 422 |
-
return_tensors="pt",
|
| 423 |
-
).to(device=model.device, dtype=torch.bfloat16)
|
| 424 |
-
|
| 425 |
-
# ์คํธ๋ฆฌ๋ฐ ์ค์
|
| 426 |
-
streamer = TextIteratorStreamer(processor, timeout=30.0, skip_prompt=True, skip_special_tokens=True)
|
| 427 |
-
gen_kwargs = dict(
|
| 428 |
-
inputs,
|
| 429 |
-
streamer=streamer,
|
| 430 |
-
max_new_tokens=max_new_tokens,
|
| 431 |
temperature=0.8,
|
| 432 |
top_p=0.9,
|
|
|
|
| 433 |
)
|
| 434 |
|
| 435 |
-
# ๋ณ๋ ์ค๋ ๋์์ ์์ฑ
|
| 436 |
-
t = Thread(target=_model_gen_with_oom_catch, kwargs=gen_kwargs)
|
| 437 |
-
t.start()
|
| 438 |
-
|
| 439 |
# ์คํธ๋ฆฌ๋ฐ ์ถ๋ ฅ
|
| 440 |
output = ""
|
| 441 |
-
for
|
| 442 |
-
|
| 443 |
-
|
|
|
|
|
|
|
|
|
|
| 444 |
|
| 445 |
except Exception as e:
|
| 446 |
logger.error(f"๋ฌธ์ ๋ถ์ ์ค๋ฅ: {e}")
|
|
@@ -494,17 +488,30 @@ css = """
|
|
| 494 |
background: #e8f5e9;
|
| 495 |
color: #2e7d32;
|
| 496 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 497 |
"""
|
| 498 |
|
| 499 |
-
with gr.Blocks(title="๐ค ๋ก๋ด ์๊ฐ ์์คํ
(Gemma3-4B)", css=css) as demo:
|
| 500 |
gr.HTML("""
|
| 501 |
<div class="robot-header">
|
| 502 |
<h1>๐ค ๋ก๋ด ์๊ฐ ์์คํ
</h1>
|
| 503 |
-
<h3>๐ฎ Gemma3-R1984-4B + ๐ท ์ค์๊ฐ ์น์บ + ๐ ์น ๊ฒ์</h3>
|
| 504 |
-
<p>โก
|
| 505 |
</div>
|
| 506 |
""")
|
| 507 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 508 |
|
| 509 |
with gr.Row():
|
| 510 |
# ์ผ์ชฝ: ์น์บ ๋ฐ ์
๋ ฅ
|
|
@@ -575,15 +582,15 @@ with gr.Blocks(title="๐ค ๋ก๋ด ์๊ฐ ์์คํ
(Gemma3-4B)", css=css) as dem
|
|
| 575 |
|
| 576 |
enable_thinking = gr.Checkbox(
|
| 577 |
label="๐ค ์ถ๋ก ๊ณผ์ ํ์",
|
| 578 |
-
value=False,
|
| 579 |
info="Chain-of-Thought ์ถ๋ก ๊ณผ์ ์ ๋ณด์ฌ์ค๋๋ค"
|
| 580 |
)
|
| 581 |
|
| 582 |
max_tokens = gr.Slider(
|
| 583 |
label="์ต๋ ํ ํฐ ์",
|
| 584 |
minimum=100,
|
| 585 |
-
maximum=
|
| 586 |
-
value=300,
|
| 587 |
step=50
|
| 588 |
)
|
| 589 |
|
|
@@ -600,8 +607,8 @@ with gr.Blocks(title="๐ค ๋ก๋ด ์๊ฐ ์์คํ
(Gemma3-4B)", css=css) as dem
|
|
| 600 |
'<div class="status-box" style="background:#d4edda; color:#155724;">๐ฎ ์์คํ
์ค๋น ์๋ฃ</div>'
|
| 601 |
)
|
| 602 |
|
| 603 |
-
# ๋ฌธ์ ๋ถ์ ํญ
|
| 604 |
-
with gr.Tab("๐ ๋ฌธ์ ๋ถ์", visible=False):
|
| 605 |
with gr.Row():
|
| 606 |
with gr.Column():
|
| 607 |
doc_files = gr.File(
|
|
@@ -661,7 +668,7 @@ with gr.Blocks(title="๐ค ๋ก๋ด ์๊ฐ ์์คํ
(Gemma3-4B)", css=css) as dem
|
|
| 661 |
max_new_tokens=tokens
|
| 662 |
)
|
| 663 |
|
| 664 |
-
# ๊ฒฐ๊ณผ ํฌ๋งทํ
|
| 665 |
timestamp = time.strftime("%H:%M:%S")
|
| 666 |
task_names = {
|
| 667 |
"planning": "์์
๊ณํ",
|
|
@@ -776,7 +783,7 @@ with gr.Blocks(title="๐ค ๋ก๋ด ์๊ฐ ์์คํ
(Gemma3-4B)", css=css) as dem
|
|
| 776 |
)
|
| 777 |
|
| 778 |
# ์๋ ์บก์ฒ ํ์ด๋จธ (10์ด๋ง๋ค)
|
| 779 |
-
timer = gr.Timer(10.0, active=False)
|
| 780 |
|
| 781 |
# ์๋ ์บก์ฒ ํ ๊ธ ์ด๋ฒคํธ
|
| 782 |
def toggle_auto_capture(enabled):
|
|
@@ -809,7 +816,7 @@ with gr.Blocks(title="๐ค ๋ก๋ด ์๊ฐ ์์คํ
(Gemma3-4B)", css=css) as dem
|
|
| 809 |
)
|
| 810 |
|
| 811 |
if __name__ == "__main__":
|
| 812 |
-
print("๐ ๋ก๋ด ์๊ฐ ์์คํ
์์ (Gemma3-R1984-4B)...")
|
| 813 |
demo.launch(
|
| 814 |
server_name="0.0.0.0",
|
| 815 |
server_port=7860,
|
|
|
|
| 15 |
import numpy as np
|
| 16 |
from loguru import logger
|
| 17 |
from PIL import Image
|
|
|
|
| 18 |
import time
|
| 19 |
import warnings
|
| 20 |
from typing import Dict, List, Optional, Union
|
| 21 |
+
import base64
|
| 22 |
+
from io import BytesIO
|
| 23 |
+
|
| 24 |
+
# llama-cpp-python for GGUF
|
| 25 |
+
from llama_cpp import Llama
|
| 26 |
+
from llama_cpp.llama_chat_format import Llava16ChatHandler
|
| 27 |
|
| 28 |
# CSV/TXT ๋ถ์
|
| 29 |
import pandas as pd
|
|
|
|
| 32 |
|
| 33 |
warnings.filterwarnings('ignore')
|
| 34 |
|
| 35 |
+
print("๐ฎ ๋ก๋ด ์๊ฐ ์์คํ
์ด๊ธฐํ (Gemma3-R1984-4B GGUF Q4_K_M)...")
|
| 36 |
|
| 37 |
##############################################################################
|
| 38 |
# ์์ ์ ์
|
|
|
|
| 45 |
##############################################################################
|
| 46 |
# ์ ์ญ ๋ณ์
|
| 47 |
##############################################################################
|
| 48 |
+
llm = None
|
|
|
|
| 49 |
model_loaded = False
|
| 50 |
+
model_name = "Gemma3-R1984-4B-Q4_K_M"
|
| 51 |
|
| 52 |
##############################################################################
|
| 53 |
# ๋ฉ๋ชจ๋ฆฌ ๊ด๋ฆฌ
|
|
|
|
| 89 |
"domain": "google.com",
|
| 90 |
"serp_type": "web",
|
| 91 |
"device": "desktop",
|
| 92 |
+
"lang": "ko",
|
| 93 |
+
"num": "10"
|
| 94 |
}
|
| 95 |
|
| 96 |
headers = {
|
|
|
|
| 194 |
|
| 195 |
return f"**[PDF ํ์ผ: {os.path.basename(pdf_path)}]**\n\n{full_text}"
|
| 196 |
|
| 197 |
+
##############################################################################
|
| 198 |
+
# ์ด๋ฏธ์ง๋ฅผ base64๋ก ๋ณํ
|
| 199 |
+
##############################################################################
|
| 200 |
+
def image_to_base64_data_uri(image: Union[np.ndarray, Image.Image]) -> str:
|
| 201 |
+
"""์ด๋ฏธ์ง๋ฅผ base64 data URI๋ก ๋ณํ"""
|
| 202 |
+
if isinstance(image, np.ndarray):
|
| 203 |
+
image = Image.fromarray(image).convert('RGB')
|
| 204 |
+
|
| 205 |
+
buffered = BytesIO()
|
| 206 |
+
image.save(buffered, format="JPEG", quality=85)
|
| 207 |
+
img_str = base64.b64encode(buffered.getvalue()).decode()
|
| 208 |
+
return f"data:image/jpeg;base64,{img_str}"
|
| 209 |
+
|
| 210 |
##############################################################################
|
| 211 |
# ๋ชจ๋ธ ๋ก๋
|
| 212 |
##############################################################################
|
| 213 |
@spaces.GPU(duration=120)
|
| 214 |
def load_model():
|
| 215 |
+
global llm, model_loaded
|
| 216 |
|
| 217 |
if model_loaded:
|
| 218 |
logger.info("๋ชจ๋ธ์ด ์ด๋ฏธ ๋ก๋๋์ด ์์ต๋๋ค.")
|
| 219 |
return True
|
| 220 |
|
| 221 |
try:
|
| 222 |
+
logger.info("Gemma3-R1984-4B GGUF Q4_K_M ๋ชจ๋ธ ๋ก๋ฉ ์์...")
|
| 223 |
clear_cuda_cache()
|
| 224 |
|
| 225 |
+
# ๋ชจ๋ธ ๊ฒฝ๋ก ์ค์
|
| 226 |
+
model_path = os.getenv("MODEL_PATH", "VIDraft/Gemma-3-R1984-4B-GGUF/Gemma-3-R1984-4B.Q4_K_M.gguf")
|
| 227 |
+
mmproj_path = os.getenv("MMPROJ_PATH", "VIDraft/Gemma-3-R1984-4B-GGUF/Gemma-3-R1984-4B.mmproj-Q8_0.gguf")
|
| 228 |
+
|
| 229 |
+
# GPU ์ฌ์ฉ ๊ฐ๋ฅ ์ฌ๋ถ ํ์ธ
|
| 230 |
+
n_gpu_layers = -1 if torch.cuda.is_available() else 0
|
| 231 |
|
| 232 |
+
# ์ฑํ
ํธ๋ค๋ฌ ์์ฑ (๋น์ ์ง์)
|
| 233 |
+
chat_handler = Llava16ChatHandler(
|
| 234 |
+
clip_model_path=mmproj_path,
|
| 235 |
+
verbose=False
|
| 236 |
+
)
|
| 237 |
+
|
| 238 |
+
# ๋ชจ๋ธ ๋ก๋
|
| 239 |
+
llm = Llama(
|
| 240 |
+
model_path=model_path,
|
| 241 |
+
chat_handler=chat_handler,
|
| 242 |
+
n_ctx=4096, # ์ปจํ
์คํธ ํฌ๊ธฐ
|
| 243 |
+
n_gpu_layers=n_gpu_layers, # GPU ๋ ์ด์ด
|
| 244 |
+
n_threads=8, # CPU ์ค๋ ๋
|
| 245 |
+
verbose=False,
|
| 246 |
+
seed=42,
|
| 247 |
+
logits_all=True, # ๋น์ ๋ชจ๋ธ์ ํ์
|
| 248 |
)
|
| 249 |
|
| 250 |
model_loaded = True
|
|
|
|
| 255 |
logger.error(f"๋ชจ๋ธ ๋ก๋ฉ ์คํจ: {e}")
|
| 256 |
return False
|
| 257 |
|
| 258 |
+
##############################################################################
|
| 259 |
+
# ์ฑํ
ํ
ํ๋ฆฟ ํฌ๋งทํ
|
| 260 |
+
##############################################################################
|
| 261 |
+
def format_chat_prompt(system_prompt: str, user_prompt: str, image_uri: Optional[str] = None) -> List[Dict]:
|
| 262 |
+
"""Gemma ์คํ์ผ ์ฑํ
ํ๋กฌํํธ ์์ฑ"""
|
| 263 |
+
messages = []
|
| 264 |
+
|
| 265 |
+
# ์์คํ
๋ฉ์์ง
|
| 266 |
+
messages.append({
|
| 267 |
+
"role": "system",
|
| 268 |
+
"content": system_prompt
|
| 269 |
+
})
|
| 270 |
+
|
| 271 |
+
# ์ฌ์ฉ์ ๋ฉ์์ง
|
| 272 |
+
user_content = []
|
| 273 |
+
if image_uri:
|
| 274 |
+
user_content.append({
|
| 275 |
+
"type": "image_url",
|
| 276 |
+
"image_url": {"url": image_uri}
|
| 277 |
+
})
|
| 278 |
+
user_content.append({
|
| 279 |
+
"type": "text",
|
| 280 |
+
"text": user_prompt
|
| 281 |
+
})
|
| 282 |
+
|
| 283 |
+
messages.append({
|
| 284 |
+
"role": "user",
|
| 285 |
+
"content": user_content
|
| 286 |
+
})
|
| 287 |
+
|
| 288 |
+
return messages
|
| 289 |
+
|
| 290 |
##############################################################################
|
| 291 |
# ์ด๋ฏธ์ง ๋ถ์ (๋ก๋ด ํ์คํฌ ์ค์ฌ)
|
| 292 |
##############################################################################
|
|
|
|
| 296 |
prompt: str,
|
| 297 |
task_type: str = "general",
|
| 298 |
use_web_search: bool = False,
|
| 299 |
+
enable_thinking: bool = False,
|
| 300 |
+
max_new_tokens: int = 300
|
| 301 |
) -> str:
|
| 302 |
"""๋ก๋ด ์์
์ ์ํ ์ด๋ฏธ์ง ๋ถ์"""
|
| 303 |
+
global llm
|
| 304 |
|
| 305 |
if not model_loaded:
|
| 306 |
if not load_model():
|
| 307 |
return "โ ๋ชจ๋ธ ๋ก๋ฉ ์คํจ"
|
| 308 |
|
| 309 |
try:
|
| 310 |
+
# ์ด๋ฏธ์ง๋ฅผ base64๋ก ๋ณํ
|
| 311 |
+
image_uri = image_to_base64_data_uri(image)
|
|
|
|
| 312 |
|
| 313 |
+
# ํ์คํฌ๋ณ ์์คํ
ํ๋กฌํํธ ๊ตฌ์ฑ
|
| 314 |
system_prompts = {
|
| 315 |
"general": "๋น์ ์ ๋ก๋ด ์๊ฐ ์์คํ
์
๋๋ค. ๋จผ์ ์ฅ๋ฉด์ 1-2์ค๋ก ์ค๋ช
ํ๊ณ , ํต์ฌ ๋ด์ฉ์ ๊ฐ๊ฒฐํ๊ฒ ๋ถ์ํ์ธ์.",
|
| 316 |
"planning": """๋น์ ์ ๋ก๋ด ์์
๊ณํ AI์
๋๋ค.
|
|
|
|
| 344 |
combined_system = f"{search_results}\n\n{system_prompt}"
|
| 345 |
|
| 346 |
# ๋ฉ์์ง ๊ตฌ์ฑ
|
| 347 |
+
messages = format_chat_prompt(combined_system, prompt, image_uri)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 348 |
|
| 349 |
# ์์ฑ
|
| 350 |
+
response = llm.create_chat_completion(
|
| 351 |
+
messages=messages,
|
| 352 |
+
max_tokens=max_new_tokens,
|
| 353 |
+
temperature=0.7,
|
| 354 |
+
top_p=0.9,
|
| 355 |
+
stream=False
|
| 356 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 357 |
|
| 358 |
+
# ์๋ต ์ถ์ถ
|
| 359 |
+
result = response['choices'][0]['message']['content'].strip()
|
|
|
|
|
|
|
|
|
|
| 360 |
|
| 361 |
+
return result
|
| 362 |
|
| 363 |
except Exception as e:
|
| 364 |
logger.error(f"์ด๋ฏธ์ง ๋ถ์ ์ค๋ฅ: {e}")
|
|
|
|
| 370 |
##############################################################################
|
| 371 |
# ๋ฌธ์ ๋ถ์ (์คํธ๋ฆฌ๋ฐ)
|
| 372 |
##############################################################################
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 373 |
@spaces.GPU(duration=120)
|
| 374 |
def analyze_documents_streaming(
|
| 375 |
files: List[str],
|
|
|
|
| 378 |
max_new_tokens: int = 2048
|
| 379 |
) -> Iterator[str]:
|
| 380 |
"""๋ฌธ์ ๋ถ์ (์คํธ๋ฆฌ๋ฐ)"""
|
| 381 |
+
global llm
|
| 382 |
|
| 383 |
if not model_loaded:
|
| 384 |
if not load_model():
|
|
|
|
| 409 |
continue
|
| 410 |
doc_contents.append(content)
|
| 411 |
|
| 412 |
+
# ์ ์ฒด ํ๋กฌํํธ ๊ตฌ์ฑ
|
| 413 |
+
full_prompt = "\n\n".join(doc_contents) + f"\n\n{prompt}"
|
| 414 |
+
|
| 415 |
# ๋ฉ์์ง ๊ตฌ์ฑ
|
| 416 |
messages = [
|
| 417 |
+
{"role": "system", "content": system_content},
|
| 418 |
+
{"role": "user", "content": full_prompt}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 419 |
]
|
| 420 |
|
| 421 |
+
# ์คํธ๋ฆฌ๋ฐ ์์ฑ
|
| 422 |
+
stream = llm.create_chat_completion(
|
| 423 |
+
messages=messages,
|
| 424 |
+
max_tokens=max_new_tokens,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 425 |
temperature=0.8,
|
| 426 |
top_p=0.9,
|
| 427 |
+
stream=True
|
| 428 |
)
|
| 429 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 430 |
# ์คํธ๋ฆฌ๋ฐ ์ถ๋ ฅ
|
| 431 |
output = ""
|
| 432 |
+
for chunk in stream:
|
| 433 |
+
if 'choices' in chunk and len(chunk['choices']) > 0:
|
| 434 |
+
delta = chunk['choices'][0].get('delta', {})
|
| 435 |
+
if 'content' in delta:
|
| 436 |
+
output += delta['content']
|
| 437 |
+
yield output
|
| 438 |
|
| 439 |
except Exception as e:
|
| 440 |
logger.error(f"๋ฌธ์ ๋ถ์ ์ค๋ฅ: {e}")
|
|
|
|
| 488 |
background: #e8f5e9;
|
| 489 |
color: #2e7d32;
|
| 490 |
}
|
| 491 |
+
.model-info {
|
| 492 |
+
background: #fff3cd;
|
| 493 |
+
color: #856404;
|
| 494 |
+
padding: 10px;
|
| 495 |
+
border-radius: 5px;
|
| 496 |
+
margin: 10px 0;
|
| 497 |
+
text-align: center;
|
| 498 |
+
}
|
| 499 |
"""
|
| 500 |
|
| 501 |
+
with gr.Blocks(title="๐ค ๋ก๋ด ์๊ฐ ์์คํ
(Gemma3-4B GGUF)", css=css) as demo:
|
| 502 |
gr.HTML("""
|
| 503 |
<div class="robot-header">
|
| 504 |
<h1>๐ค ๋ก๋ด ์๊ฐ ์์คํ
</h1>
|
| 505 |
+
<h3>๐ฎ Gemma3-R1984-4B GGUF Q4_K_M + ๐ท ์ค์๊ฐ ์น์บ + ๐ ์น ๊ฒ์</h3>
|
| 506 |
+
<p>โก ์์ํ ๋ชจ๋ธ๋ก ๋ ๋น ๋ฅด๊ณ ํจ์จ์ ์ธ ๋ก๋ด ์์
๋ถ์!</p>
|
| 507 |
</div>
|
| 508 |
""")
|
| 509 |
|
| 510 |
+
gr.HTML("""
|
| 511 |
+
<div class="model-info">
|
| 512 |
+
<strong>๋ชจ๋ธ:</strong> Gemma3-R1984-4B Q4_K_M (2.49GB) | <strong>๋ฉ๋ชจ๋ฆฌ ์ฌ์ฉ:</strong> ~3-4GB VRAM
|
| 513 |
+
</div>
|
| 514 |
+
""")
|
| 515 |
|
| 516 |
with gr.Row():
|
| 517 |
# ์ผ์ชฝ: ์น์บ ๋ฐ ์
๋ ฅ
|
|
|
|
| 582 |
|
| 583 |
enable_thinking = gr.Checkbox(
|
| 584 |
label="๐ค ์ถ๋ก ๊ณผ์ ํ์",
|
| 585 |
+
value=False,
|
| 586 |
info="Chain-of-Thought ์ถ๋ก ๊ณผ์ ์ ๋ณด์ฌ์ค๋๋ค"
|
| 587 |
)
|
| 588 |
|
| 589 |
max_tokens = gr.Slider(
|
| 590 |
label="์ต๋ ํ ํฐ ์",
|
| 591 |
minimum=100,
|
| 592 |
+
maximum=2048,
|
| 593 |
+
value=300,
|
| 594 |
step=50
|
| 595 |
)
|
| 596 |
|
|
|
|
| 607 |
'<div class="status-box" style="background:#d4edda; color:#155724;">๐ฎ ์์คํ
์ค๋น ์๋ฃ</div>'
|
| 608 |
)
|
| 609 |
|
| 610 |
+
# ๋ฌธ์ ๋ถ์ ํญ
|
| 611 |
+
with gr.Tab("๐ ๋ฌธ์ ๋ถ์", visible=False):
|
| 612 |
with gr.Row():
|
| 613 |
with gr.Column():
|
| 614 |
doc_files = gr.File(
|
|
|
|
| 668 |
max_new_tokens=tokens
|
| 669 |
)
|
| 670 |
|
| 671 |
+
# ๊ฒฐ๊ณผ ํฌ๋งทํ
|
| 672 |
timestamp = time.strftime("%H:%M:%S")
|
| 673 |
task_names = {
|
| 674 |
"planning": "์์
๊ณํ",
|
|
|
|
| 783 |
)
|
| 784 |
|
| 785 |
# ์๋ ์บก์ฒ ํ์ด๋จธ (10์ด๋ง๋ค)
|
| 786 |
+
timer = gr.Timer(10.0, active=False)
|
| 787 |
|
| 788 |
# ์๋ ์บก์ฒ ํ ๊ธ ์ด๋ฒคํธ
|
| 789 |
def toggle_auto_capture(enabled):
|
|
|
|
| 816 |
)
|
| 817 |
|
| 818 |
if __name__ == "__main__":
|
| 819 |
+
print("๐ ๋ก๋ด ์๊ฐ ์์คํ
์์ (Gemma3-R1984-4B GGUF Q4_K_M)...")
|
| 820 |
demo.launch(
|
| 821 |
server_name="0.0.0.0",
|
| 822 |
server_port=7860,
|