ROBO-R1984

Build error

App Files Files Community

seawolf2357 commited on Mar 16, 2025

Commit

f024201

verified ·

1 Parent(s): c3d078f

Update app.py

Browse files

Files changed (1) hide show

app.py +101 -54

app.py CHANGED Viewed

@@ -16,6 +16,8 @@ from transformers import AutoProcessor, Gemma3ForConditionalGeneration, TextIter
 # [PDF] PyPDF2 추가
 import PyPDF2
 model_id = os.getenv("MODEL_ID", "google/gemma-3-27b-it")
 processor = AutoProcessor.from_pretrained(model_id, padding_side="left")
@@ -25,7 +27,46 @@ model = Gemma3ForConditionalGeneration.from_pretrained(
 MAX_NUM_IMAGES = int(os.getenv("MAX_NUM_IMAGES", "5"))
 def count_files_in_new_message(paths: list[str]) -> tuple[int, int]:
     image_count = 0
     video_count = 0
@@ -36,7 +77,6 @@ def count_files_in_new_message(paths: list[str]) -> tuple[int, int]:
             image_count += 1
     return image_count, video_count
 def count_files_in_history(history: list[dict]) -> tuple[int, int]:
     image_count = 0
     video_count = 0
@@ -49,18 +89,22 @@ def count_files_in_history(history: list[dict]) -> tuple[int, int]:
             image_count += 1
     return image_count, video_count
 def validate_media_constraints(message: dict, history: list[dict]) -> bool:
     """
     이미지/비디오 개수와 혼합 여부 등을 검사하는 함수.
-    PDF는 검사 로직에서 제외하여 업로드만 허용.
     """
-    # [PDF] PDF 파일 제외 처리
     pdf_files = [f for f in message["files"] if f.endswith(".pdf")]
-    non_pdf_files = [f for f in message["files"] if not f.endswith(".pdf")]
-    # 기존 로직은 non_pdf_files(= 이미지/비디오)에 대해서만 체크
-    new_image_count, new_video_count = count_files_in_new_message(non_pdf_files)
     history_image_count, history_video_count = count_files_in_history(history)
     image_count = history_image_count + new_image_count
     video_count = history_video_count + new_video_count
@@ -75,25 +119,22 @@ def validate_media_constraints(message: dict, history: list[dict]) -> bool:
         if "<image>" in message["text"]:
             gr.Warning("Using <image> tags with video files is not supported.")
             return False
-        # TODO: Add frame count validation for videos similar to image count limits  # noqa: FIX002, TD002, TD003
     if video_count == 0 and image_count > MAX_NUM_IMAGES:
         gr.Warning(f"You can upload up to {MAX_NUM_IMAGES} images.")
         return False
-    # [PDF] PDF 갯수 제한(필요하다면)도 추가 가능
-    # 일단 제한은 두지 않고 바로 True 반환
-    # <image> 태그가 있을 경우, 이미지 개수와 매칭 검사
     if "<image>" in message["text"]:
-        # new_image_count는 pdf 제외된 이미지 수
         if message["text"].count("<image>") != new_image_count:
             gr.Warning("The number of <image> tags in the text does not match the number of images.")
             return False
     return True
 def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
     vidcap = cv2.VideoCapture(video_path)
     fps = vidcap.get(cv2.CAP_PROP_FPS)
@@ -114,7 +155,6 @@ def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
     vidcap.release()
     return frames
 def process_video(video_path: str) -> list[dict]:
     content = []
     frames = downsample_video(video_path)
@@ -127,7 +167,9 @@ def process_video(video_path: str) -> list[dict]:
     logger.debug(f"{content=}")
     return content
 def process_interleaved_images(message: dict) -> list[dict]:
     logger.debug(f"{message['files']=}")
     parts = re.split(r"(<image>)", message["text"])
@@ -148,40 +190,25 @@ def process_interleaved_images(message: dict) -> list[dict]:
     logger.debug(f"{content=}")
     return content
-# [PDF] PDF -> Markdown 변환 함수 추가
-def pdf_to_markdown(pdf_path: str) -> str:
-    """
-    PDF 파일을 텍스트로 추출 후, 간단한 Markdown 형태로 반환.
-    """
-    text_chunks = []
-    with open(pdf_path, "rb") as f:
-        reader = PyPDF2.PdfReader(f)
-        for page_num, page in enumerate(reader.pages, start=1):
-            page_text = page.extract_text()
-            page_text = page_text.strip() if page_text else ""
-            if page_text:
-                # 페이지별로 간단한 헤더와 본문을 Markdown으로 합침
-                text_chunks.append(f"## Page {page_num}\n\n{page_text}\n")
-    return "\n".join(text_chunks)
 def process_new_user_message(message: dict) -> list[dict]:
-    """
-    새 user message에서 text, 파일(이미지/비디오/PDF)을 처리.
-    """
     if not message["files"]:
         return [{"type": "text", "text": message["text"]}]
-    # [PDF] PDF 파일 목록
     pdf_files = [f for f in message["files"] if f.endswith(".pdf")]
-    # 이미지·비디오 목록
-    other_files = [f for f in message["files"] if not f.endswith(".pdf")]
-    # 일단 사용자의 text를 가장 먼저 넣는다
     content_list = [{"type": "text", "text": message["text"]}]
-    # PDF 변환 후 추가
     for pdf_path in pdf_files:
         pdf_markdown = pdf_to_markdown(pdf_path)
         if pdf_markdown.strip():
@@ -189,12 +216,14 @@ def process_new_user_message(message: dict) -> list[dict]:
         else:
             content_list.append({"type": "text", "text": "(PDF에서 텍스트 추출 실패)"})
-    # 영상이 있는지 확인
     video_files = [f for f in other_files if f.endswith(".mp4")]
     if video_files:
-        # 비디오는 한 개만 처리한다는 전제 (validate_media_constraints에서 이미 검사)
-        # 여러 개일 경우 첫 번째 것만 처리하거나, 경고 처리
         content_list += process_video(video_files[0])
         return content_list
@@ -209,7 +238,9 @@ def process_new_user_message(message: dict) -> list[dict]:
     return content_list
 def process_history(history: list[dict]) -> list[dict]:
     messages = []
     current_user_content: list[dict] = []
@@ -227,7 +258,9 @@ def process_history(history: list[dict]) -> list[dict]:
                 current_user_content.append({"type": "image", "url": content[0]})
     return messages
 @spaces.GPU(duration=120)
 def run(message: dict, history: list[dict], system_prompt: str = "", max_new_tokens: int = 512) -> Iterator[str]:
     if not validate_media_constraints(message, history):
@@ -262,7 +295,9 @@ def run(message: dict, history: list[dict], system_prompt: str = "", max_new_tok
         output += delta
         yield output
 examples = [
     [
         {
@@ -385,22 +420,34 @@ examples = [
     ],
 ]
-# [PDF] .pdf 허용
 demo = gr.ChatInterface(
     fn=run,
     type="messages",
     chatbot=gr.Chatbot(type="messages", scale=1, allow_tags=["image"]),
     textbox=gr.MultimodalTextbox(
-        file_types=["image", ".mp4", ".pdf"],  # [PDF] 허용
         file_count="multiple",
         autofocus=True
     ),
     multimodal=True,
     additional_inputs=[
-        gr.Textbox(label="System Prompt", value="ou are a deeply thoughtful AI. Consider problems thoroughly and derive correct solutions through systematic reasoning. Please answer in korean."),
-        gr.Slider(label="Max New Tokens", minimum=100, maximum=8000, step=50, value=2000),
     ],
     stop_btn=False,
     title="Gemma 3 27B IT",

 # [PDF] PyPDF2 추가
 import PyPDF2
+# [CSV] Pandas 추가
+import pandas as pd
 model_id = os.getenv("MODEL_ID", "google/gemma-3-27b-it")
 processor = AutoProcessor.from_pretrained(model_id, padding_side="left")
 MAX_NUM_IMAGES = int(os.getenv("MAX_NUM_IMAGES", "5"))
+###################################################################
+# CSV를 Markdown으로 변환하는 유틸 함수
+###################################################################
+def csv_to_markdown(csv_path: str) -> str:
+    """
+    CSV 파일 전체를 문자열로 변환하여 Markdown 형태로 반환.
+    (매우 큰 CSV라면 전체를 넘기는 것이 위험할 수 있음 -> 필요 시 잘라낼 것)
+    """
+    try:
+        df = pd.read_csv(csv_path)
+        df_str = df.to_string()
+        # 필요하다면 길이 제한을 걸어도 됨
+        # if len(df_str) > 10000:
+        #     df_str = df_str[:10000] + "\n...(truncated)..."
+        return f"**[CSV File: {os.path.basename(csv_path)}]**\n\n```\n{df_str}\n```"
+    except Exception as e:
+        return f"Failed to read CSV ({os.path.basename(csv_path)}): {str(e)}"
+###################################################################
+# PDF -> Markdown 변환 함수 (기존)
+###################################################################
+def pdf_to_markdown(pdf_path: str) -> str:
+    """
+    PDF 파일을 텍스트로 추출 후, 간단한 Markdown 형태로 반환.
+    """
+    text_chunks = []
+    with open(pdf_path, "rb") as f:
+        reader = PyPDF2.PdfReader(f)
+        for page_num, page in enumerate(reader.pages, start=1):
+            page_text = page.extract_text()
+            page_text = page_text.strip() if page_text else ""
+            if page_text:
+                # 페이지별로 간단한 헤더와 본문을 Markdown으로 합침
+                text_chunks.append(f"## Page {page_num}\n\n{page_text}\n")
+    return "\n".join(text_chunks)
+###################################################################
+# 이미지/비디오 개수 카운트 (기존)
+###################################################################
 def count_files_in_new_message(paths: list[str]) -> tuple[int, int]:
     image_count = 0
     video_count = 0
             image_count += 1
     return image_count, video_count
 def count_files_in_history(history: list[dict]) -> tuple[int, int]:
     image_count = 0
     video_count = 0
             image_count += 1
     return image_count, video_count
+###################################################################
+# 미디어(이미지/비디오) 제한 검사 + PDF/CSV 예외 (기존/수정)
+###################################################################
 def validate_media_constraints(message: dict, history: list[dict]) -> bool:
     """
     이미지/비디오 개수와 혼합 여부 등을 검사하는 함수.
+    PDF, CSV 등은 검사 로직에서 제외하여 업로드만 허용.
     """
+    # pdf, csv 파일 제외
     pdf_files = [f for f in message["files"] if f.endswith(".pdf")]
+    csv_files = [f for f in message["files"] if f.lower().endswith(".csv")]
+    non_pdf_csv_files = [f for f in message["files"]
+                         if not f.endswith(".pdf") and not f.lower().endswith(".csv")]
+    # 기존 로직은 이미지/비디오에 대해서만 체크
+    new_image_count, new_video_count = count_files_in_new_message(non_pdf_csv_files)
     history_image_count, history_video_count = count_files_in_history(history)
     image_count = history_image_count + new_image_count
     video_count = history_video_count + new_video_count
         if "<image>" in message["text"]:
             gr.Warning("Using <image> tags with video files is not supported.")
             return False
     if video_count == 0 and image_count > MAX_NUM_IMAGES:
         gr.Warning(f"You can upload up to {MAX_NUM_IMAGES} images.")
         return False
+    # <image> 태그가 있을 경우, 이미지 수와 태그 수 일치
     if "<image>" in message["text"]:
         if message["text"].count("<image>") != new_image_count:
             gr.Warning("The number of <image> tags in the text does not match the number of images.")
             return False
     return True
+###################################################################
+# 동영상 처리 (기존)
+###################################################################
 def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
     vidcap = cv2.VideoCapture(video_path)
     fps = vidcap.get(cv2.CAP_PROP_FPS)
     vidcap.release()
     return frames
 def process_video(video_path: str) -> list[dict]:
     content = []
     frames = downsample_video(video_path)
     logger.debug(f"{content=}")
     return content
+###################################################################
+# <image> 태그 interleaved 이미지 처리 (기존)
+###################################################################
 def process_interleaved_images(message: dict) -> list[dict]:
     logger.debug(f"{message['files']=}")
     parts = re.split(r"(<image>)", message["text"])
     logger.debug(f"{content=}")
     return content
+###################################################################
+# 새 user message 처리 (PDF + CSV + 이미지/비디오)
+###################################################################
 def process_new_user_message(message: dict) -> list[dict]:
     if not message["files"]:
         return [{"type": "text", "text": message["text"]}]
+    # PDF 파일 목록
     pdf_files = [f for f in message["files"] if f.endswith(".pdf")]
+    # CSV 파일 목록
+    csv_files = [f for f in message["files"] if f.lower().endswith(".csv")]
+    # 이미지/비디오 (기존)
+    other_files = [f for f in message["files"]
+                   if not f.endswith(".pdf") and not f.lower().endswith(".csv")]
+    # 일단 사용자의 text를 먼저 넣는다
     content_list = [{"type": "text", "text": message["text"]}]
+    # [PDF] 변환 후 추가
     for pdf_path in pdf_files:
         pdf_markdown = pdf_to_markdown(pdf_path)
         if pdf_markdown.strip():
         else:
             content_list.append({"type": "text", "text": "(PDF에서 텍스트 추출 실패)"})
+    # [CSV] 변환 후 추가
+    for cfile in csv_files:
+        csv_md = csv_to_markdown(cfile)
+        content_list.append({"type": "text", "text": csv_md})
+    # 영상 처리
     video_files = [f for f in other_files if f.endswith(".mp4")]
     if video_files:
         content_list += process_video(video_files[0])
         return content_list
     return content_list
+###################################################################
+# 히스토리 -> LLM용 메시지 변환 (기존)
+###################################################################
 def process_history(history: list[dict]) -> list[dict]:
     messages = []
     current_user_content: list[dict] = []
                 current_user_content.append({"type": "image", "url": content[0]})
     return messages
+###################################################################
+# 메인 추론 함수 (기존)
+###################################################################
 @spaces.GPU(duration=120)
 def run(message: dict, history: list[dict], system_prompt: str = "", max_new_tokens: int = 512) -> Iterator[str]:
     if not validate_media_constraints(message, history):
         output += delta
         yield output
+###################################################################
+# 예시들 (기존 그대로)
+###################################################################
 examples = [
     [
         {
     ],
 ]
+###################################################################
+# PDF + CSV를 허용하는 Gradio ChatInterface
+###################################################################
 demo = gr.ChatInterface(
     fn=run,
     type="messages",
     chatbot=gr.Chatbot(type="messages", scale=1, allow_tags=["image"]),
     textbox=gr.MultimodalTextbox(
+        file_types=["image", ".mp4", ".pdf", ".csv"],  # pdf & csv 허용
         file_count="multiple",
         autofocus=True
     ),
     multimodal=True,
     additional_inputs=[
+        gr.Textbox(
+            label="System Prompt",
+            value=(
+                "You are a deeply thoughtful AI. Consider problems thoroughly and derive correct "
+                "solutions through systematic reasoning. Please answer in korean."
+            )
+        ),
+        gr.Slider(
+            label="Max New Tokens",
+            minimum=100,
+            maximum=8000,
+            step=50,
+            value=2000
+        ),
     ],
     stop_btn=False,
     title="Gemma 3 27B IT",