Spaces:

petchsko
/

Translator_app

Sleeping

App Files Files Community

Petch DS commited on Feb 5, 2025

Commit

e193988

1 Parent(s): 0a3b2e2

addWord_Inprogress1

Browse files

Files changed (4) hide show

.DS_Store +0 -0
translated_output.xlsx +0 -0
translator_app.ipynb +142 -111
translator_app.py +96 -20

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

translated_output.xlsx DELETED Viewed

Binary file (6.33 kB)

translator_app.ipynb CHANGED Viewed

@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 33,
    "metadata": {
     "id": "dKoye1NqPPWX"
    },
@@ -20,7 +20,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -30,77 +30,18 @@
     "from langchain_core.prompts import PromptTemplate\n",
     "from langchain_core.runnables import RunnableLambda\n",
     "import gradio as gr\n",
-    "import pandas as pd\n",
-    "from transformers import T5Tokenizer, T5ForConditionalGeneration\n",
-    "import torch\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# from docx import Document\n",
-    "\n",
-    "# # โหลดไฟล์ Word\n",
-    "# doc = Document('test_file.docx')\n",
-    "\n",
-    "# # อ่านทุก paragraph และแสดงเนื้อหา\n",
-    "# for para in doc.paragraphs:\n",
-    "#     print(para.text)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# # อ่านทุกตารางในเอกสาร\n",
-    "# for table in doc.tables:\n",
-    "#     for row in table.rows:\n",
-    "#         for cell in row.cells:\n",
-    "#             print(cell.text)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# for element in doc.element.body:\n",
-    "#     if element.tag.endswith('tbl'):\n",
-    "#         # ถ้าเป็นตาราง\n",
-    "#         print('Table found')\n",
-    "#     elif element.tag.endswith('p'):\n",
-    "#         # ถ้าเป็นพารากราฟ\n",
-    "#         print('Paragraph found')"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# # นับรูปภาพ\n",
-    "# images = doc.inline_shapes\n",
-    "# print(\"Found\", len(images), \"images\")\n",
-    "\n",
-    "# # ตัวอย่างวิธีดึงข้อมูลพื้นฐานของรูปภาพแต่ละรูป\n",
-    "# for image in images:\n",
-    "#     print(\"Image size:\", image.width.pt, \"x\", image.height.pt) "
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -110,7 +51,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -121,7 +62,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Process for Each Model"
    ]
   },
   {
@@ -133,10 +74,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
    "metadata": {},
    "outputs": [],
    "source": [
     "\n",
     "def chat_gpt_4o_mini(api_key = None):\n",
     "    model = ChatOpenAI(model_name=\"gpt-4o-mini\", api_key=api_key)\n",
@@ -158,23 +105,22 @@
     "\n",
     "    chain = prompt | model | output_parser | RunnableLambda(get_class)  \n",
     "\n",
-    "    return chain\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
    "metadata": {},
    "outputs": [],
-   "source": [
-    "\n",
-    "\n",
-    "def using_model(chosen_model, api_key=None):\n",
-    "    if chosen_model == 'ChatGPT (4o-mini)':\n",
-    "        return chat_gpt_4o_mini(api_key=api_key)\n",
-    "    else:\n",
-    "        raise ValueError(\"Unsupported model selected\")\n"
-   ]
   },
   {
    "cell_type": "markdown",
@@ -185,7 +131,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/",
@@ -199,12 +145,18 @@
     "\n",
     "def chat_gpt_translate_excel(file, sheet_name, col_name, source_lang, target_lang, where_to_place, keep_original, chosen_model, api_key = None, progress=gr.Progress()):\n",
     "    if where_to_place is None:\n",
-    "        where_to_place = 'append_all (ต่อ column สุดท้าย)'\n",
     "\n",
     "    model = using_model(chosen_model = chosen_model, api_key = api_key)\n",
     "\n",
-    "    df = pd.read_excel(file.name, sheet_name=sheet_name, header=0)\n",
-    "    # original columns = col_name + remain_col\n",
     "    original_col = df.columns\n",
     "    total_columns = len(df.columns)\n",
     "    current_step = 0\n",
@@ -212,18 +164,16 @@
     "    progress(0, desc=\"Starting translation process...\")\n",
     "\n",
     "    # Automatically detect string columns if col_name is None\n",
-    "    # col_name is column we want to translate\n",
     "    if col_name is None:\n",
     "        col_name = [col for col in df.columns if df[col].dtype == 'object']\n",
     "\n",
     "    # Determine columns that are not selected for translation\n",
-    "    # remain_col is column we do not want to translate\n",
     "    remain_col = [col for col in df.columns if col not in col_name]\n",
     "\n",
     "    # Dictionary to store unique values and their translations\n",
     "    translation_map = {}\n",
     "    trans_col_name = []\n",
-    "    print(col_name)\n",
     "\n",
     "    # Process the selected columns for translation\n",
     "    for idx, col in enumerate(col_name):\n",
@@ -253,14 +203,14 @@
     "            print(f\"Error in column {col}: {e}\")\n",
     "            continue\n",
     "\n",
-    "    # # Process remaining columns\n",
     "    # for column in remain_col:\n",
     "    #     current_step += 1\n",
-    "    #     progress(current_step / total_columns, desc=f\"Translating column name: {column} ({current_step}/{len(remain_col)})...\")\n",
     "\n",
     "    #     try:\n",
-    "    #         # We do not translate remain_col which remaining col\n",
-    "    #         # remain_col = chain.batch([{\"sentence\": column, \"source_lang\": source_lang, \"target_lang\": target_lang}])\n",
     "    #         name_col = column + '_translated'  # Assuming the translation returns a list of translations\n",
     "    #         df.loc[:, name_col] = df.loc[:, column]\n",
     "\n",
@@ -268,19 +218,14 @@
     "    #         print(f\"Error in column {column}: {e}\")\n",
     "    #         continue\n",
     "\n",
-    "    \n",
-    "    output_file = f\"{file.name.split('.')[0]}_translated.xlsx\"\n",
     "    if not os.path.exists(output_file):\n",
     "        pd.DataFrame().to_excel(output_file, index=False)\n",
     "\n",
     "    if keep_original == 'keep original':\n",
-    "        # have the all columns\n",
     "        output_col = original_col\n",
     "    else:\n",
-    "        # only translated column\n",
     "        output_col = col_name\n",
     "\n",
-    "        \n",
     "    try:\n",
     "        if where_to_place == 'append_all (ต่อ column สุดท้าย)':\n",
     "            final_cols = list(output_col) + [col for col in trans_col_name]\n",
@@ -328,6 +273,59 @@
     "\n"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -337,7 +335,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
    "metadata": {
     "id": "x8Njoc4fROSp"
    },
@@ -346,7 +344,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Running on local URL:  http://127.0.0.1:7860\n",
       "\n",
       "To create a public link, set `share=True` in `launch()`.\n"
      ]
@@ -354,7 +352,7 @@
     {
      "data": {
       "text/html": [
-       "<div><iframe src=\"http://127.0.0.1:7860/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -376,7 +374,6 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "['control type']\n",
       "Keyboard interruption in main thread... closing server.\n"
      ]
     },
@@ -384,7 +381,7 @@
      "data": {
       "text/plain": []
      },
-     "execution_count": 36,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -413,6 +410,24 @@
     "                                                  'translated_column']\n",
     "                                                  , interactive=True\n",
     "                                     )\n",
     "   \n",
     "    def get_sheet_names(file):\n",
     "      xls = pd.ExcelFile(file.name)\n",
@@ -423,20 +438,25 @@
     "        return gr.update(choices=sheets)\n",
     "\n",
     "    def update_columns(file, sheet_name):\n",
-    "        columns = get_column_names(file, sheet_name)\n",
-    "        return gr.update(choices=columns)\n",
     "\n",
     "    def get_column_names(file, sheet_name):\n",
     "        dd = pd.read_excel(file.name, sheet_name=sheet_name)\n",
     "        return list(dd.columns)\n",
     "    \n",
     "\n",
-    "    excel_file.change(fn=update_sheets, inputs=excel_file, outputs=sheet_name)\n",
     "    sheet_name.change(fn=update_columns, inputs=[excel_file, sheet_name], outputs=column_name)\n",
     "\n",
     "    model_choosing = gr.Dropdown(multiselect = False , \n",
     "                                 label = \"Choosing Model you want\", \n",
-    "                                 choices = ['ChatGPT (4o-mini)', 'DeepSeek (developing...)','another (In Progress)']\n",
     "                                 , interactive=True\n",
     "                                 )\n",
     "\n",
@@ -448,14 +468,25 @@
     "    def translate_excel(\n",
     "        file, sheet_name, columns, source_lang, target_lang, place_option, keep_opt, model, api_key\n",
     "    ):\n",
-    "        if model == \"ChatGPT (4o-mini)\":\n",
-    "            # Call ChatGPT-based translation\n",
-    "            return chat_gpt_translate_excel(\n",
-    "                file, sheet_name, columns, source_lang, target_lang, place_option, keep_opt, model, api_key\n",
-    "            )\n",
     "        else:\n",
-    "            # Handle other models (currently in progress)\n",
-    "            raise gr.Error(\"Translation with the selected model is not yet implemented.\")\n",
     "\n",
     "    # Register button click\n",
     "    translate_button.click(\n",
@@ -473,7 +504,7 @@
     "        ],\n",
     "        outputs=output_file,\n",
     "    )\n",
-    "iface.launch(debug=True)\n",
     "\n"
    ]
   },

  "cells": [
   {
    "cell_type": "code",
+   "execution_count": 34,
    "metadata": {
     "id": "dKoye1NqPPWX"
    },
   },
   {
    "cell_type": "code",
+   "execution_count": 35,
    "metadata": {},
    "outputs": [],
    "source": [
     "from langchain_core.prompts import PromptTemplate\n",
     "from langchain_core.runnables import RunnableLambda\n",
     "import gradio as gr\n",
+    "import pandas as pd"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 36,
    "metadata": {},
    "outputs": [],
    "source": [
+    "from docx import Document"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
   },
   {
    "cell_type": "code",
+   "execution_count": 37,
    "metadata": {},
    "outputs": [],
    "source": [
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "# Process"
    ]
   },
   {
   },
   {
    "cell_type": "code",
+   "execution_count": 38,
    "metadata": {},
    "outputs": [],
    "source": [
+    "def using_model(chosen_model, api_key):\n",
+    "    if chosen_model == 'ChatGPT (4o-mini)':\n",
+    "        model = chat_gpt_4o_mini(api_key = api_key)\n",
+    "    else:\n",
+    "        pass\n",
+    "    return model\n",
     "\n",
     "def chat_gpt_4o_mini(api_key = None):\n",
     "    model = ChatOpenAI(model_name=\"gpt-4o-mini\", api_key=api_key)\n",
     "\n",
     "    chain = prompt | model | output_parser | RunnableLambda(get_class)  \n",
     "\n",
+    "    return chain"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   },
   {
    "cell_type": "markdown",
   },
   {
    "cell_type": "code",
+   "execution_count": 39,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/",
     "\n",
     "def chat_gpt_translate_excel(file, sheet_name, col_name, source_lang, target_lang, where_to_place, keep_original, chosen_model, api_key = None, progress=gr.Progress()):\n",
     "    if where_to_place is None:\n",
+    "        where_to_place = 'append_all'\n",
     "\n",
     "    model = using_model(chosen_model = chosen_model, api_key = api_key)\n",
     "\n",
+    "    if isinstance(file, pd.DataFrame):\n",
+    "        df = file.copy()\n",
+    "        output_file = f\"{file.name.unique()[0].split('.')[0]}_translated.xlsx\"\n",
+    "        df = df.drop(columns=['name'])\n",
+    "    else:\n",
+    "        df = pd.read_excel(file.name, sheet_name=sheet_name, header=0)\n",
+    "        output_file = f\"{file.name.split('.')[0]}_translated.xlsx\"\n",
+    "\n",
     "    original_col = df.columns\n",
     "    total_columns = len(df.columns)\n",
     "    current_step = 0\n",
     "    progress(0, desc=\"Starting translation process...\")\n",
     "\n",
     "    # Automatically detect string columns if col_name is None\n",
     "    if col_name is None:\n",
     "        col_name = [col for col in df.columns if df[col].dtype == 'object']\n",
     "\n",
     "    # Determine columns that are not selected for translation\n",
     "    remain_col = [col for col in df.columns if col not in col_name]\n",
     "\n",
     "    # Dictionary to store unique values and their translations\n",
     "    translation_map = {}\n",
     "    trans_col_name = []\n",
+    "\n",
     "\n",
     "    # Process the selected columns for translation\n",
     "    for idx, col in enumerate(col_name):\n",
     "            print(f\"Error in column {col}: {e}\")\n",
     "            continue\n",
     "\n",
+    "    # Process remaining columns\n",
     "    # for column in remain_col:\n",
     "    #     current_step += 1\n",
+    "    #     progress(current_step / total_columns, desc=f\"Translating column name: {column} ({current_step}/{total_columns})...\")\n",
     "\n",
     "    #     try:\n",
+    "    #         # We do not translate all_col which remaining col\n",
+    "    #         # all_col_translation = chain.batch([{\"sentence\": column, \"source_lang\": source_lang, \"target_lang\": target_lang}])\n",
     "    #         name_col = column + '_translated'  # Assuming the translation returns a list of translations\n",
     "    #         df.loc[:, name_col] = df.loc[:, column]\n",
     "\n",
     "    #         print(f\"Error in column {column}: {e}\")\n",
     "    #         continue\n",
     "\n",
     "    if not os.path.exists(output_file):\n",
     "        pd.DataFrame().to_excel(output_file, index=False)\n",
     "\n",
     "    if keep_original == 'keep original':\n",
     "        output_col = original_col\n",
     "    else:\n",
     "        output_col = col_name\n",
     "\n",
     "    try:\n",
     "        if where_to_place == 'append_all (ต่อ column สุดท้าย)':\n",
     "            final_cols = list(output_col) + [col for col in trans_col_name]\n",
     "\n"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def extract_word_content_to_excel(file_path):\n",
+    "    \"\"\" ดึงเนื้อหา + รูปภาพจากไฟล์ Word และบันทึกเป็น Excel \"\"\"\n",
+    "    doc = Document(file_path)\n",
+    "    \n",
+    "    data = []\n",
+    "    paragraph_count = 0\n",
+    "\n",
+    "    for element in doc.element.body:\n",
+    "        if element.tag.endswith(\"p\"):  # Paragraph\n",
+    "            paragraph_text = element.text.strip()\n",
+    "            paragraph_count += 1\n",
+    "            data.append([paragraph_count, paragraph_text])  # บันทึกพารากราฟ\n",
+    "\n",
+    "        elif element.tag.endswith(\"tbl\"):  # Table (ถ้ามี)\n",
+    "            paragraph_count += 1\n",
+    "            data.append([paragraph_count, \"[Table]\"])\n",
+    "\n",
+    "        elif element.tag.endswith(\"drawing\"):  # Image (รูปภาพ)\n",
+    "            paragraph_count += 1\n",
+    "            data.append([paragraph_count, \"[Image]\"])\n",
+    "\n",
+    "    # สร้าง DataFrame\n",
+    "    df = pd.DataFrame(data, columns=[\"paragraph\", \"original\"])\n",
+    "    df['name'] = file_path.split('/')[-1]\n",
+    "    return df\n",
+    "\n",
+    "def chat_gpt_translate_word(file, sheet_name, col_name, source_lang, target_lang, where_to_place, keep_original, chosen_model, api_key = None, progress=gr.Progress()):\n",
+    "    word_to_excel_file = extract_word_content_to_excel(file)\n",
+    "    return chat_gpt_translate_excel(word_to_excel_file, \n",
+    "                             sheet_name=\"Sheet1\", \n",
+    "                             col_name = ['original'], \n",
+    "                             source_lang = source_lang, \n",
+    "                             target_lang = target_lang, \n",
+    "                             where_to_place=\"append_all (ต่อ column สุดท้าย)\", \n",
+    "                             keep_original=\"keep original\", \n",
+    "                             chosen_model = chosen_model, \n",
+    "                             api_key = api_key\n",
+    "                             )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
   },
   {
    "cell_type": "code",
+   "execution_count": 41,
    "metadata": {
     "id": "x8Njoc4fROSp"
    },
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "Running on local URL:  http://127.0.0.1:7861\n",
       "\n",
       "To create a public link, set `share=True` in `launch()`.\n"
      ]
     {
      "data": {
       "text/html": [
+       "<div><iframe src=\"http://127.0.0.1:7861/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
      "name": "stdout",
      "output_type": "stream",
      "text": [
       "Keyboard interruption in main thread... closing server.\n"
      ]
     },
      "data": {
       "text/plain": []
      },
+     "execution_count": 41,
      "metadata": {},
      "output_type": "execute_result"
     }
     "                                                  'translated_column']\n",
     "                                                  , interactive=True\n",
     "                                     )\n",
+    "\n",
+    "    def check_file_type(file):\n",
+    "        \"\"\" ตรวจสอบว่าไฟล์ที่อัปโหลดเป็น Word หรือ Excel \"\"\"\n",
+    "        file_extension = os.path.splitext(file.name)[-1].lower()\n",
+    "\n",
+    "        if file_extension in [\".docx\", \".doc\"]:\n",
+    "            return gr.update(choices=['all paragraphs only', 'specified paragraph or page (Developing ...)'])\n",
+    "        elif file_extension in [\".xlsx\", \".xls\"]:\n",
+    "            return update_sheets(file)\n",
+    "        else:\n",
+    "            return \"Unknown\"\n",
+    "        \n",
+    "    def check_uploaded_file(file):\n",
+    "        \"\"\" ฟังก์ชันรับไฟล์ที่อัปโหลด แล้วตรวจสอบประเภท \"\"\"\n",
+    "        if file is None:\n",
+    "            return \"No file uploaded\"\n",
+    "        return check_file_type(file)\n",
+    "\n",
     "   \n",
     "    def get_sheet_names(file):\n",
     "      xls = pd.ExcelFile(file.name)\n",
     "        return gr.update(choices=sheets)\n",
     "\n",
     "    def update_columns(file, sheet_name):\n",
+    "        if os.path.splitext(file.name)[-1].lower() in [\".docx\", \".doc\"]:\n",
+    "            return gr.update(choices=['original'])\n",
+    "        elif os.path.splitext(file.name)[-1].lower() in [\".xlsx\", \".xls\"]:\n",
+    "            columns = get_column_names(file, sheet_name)\n",
+    "            return gr.update(choices=columns)\n",
+    "        else:\n",
+    "            return \"error\"\n",
     "\n",
     "    def get_column_names(file, sheet_name):\n",
     "        dd = pd.read_excel(file.name, sheet_name=sheet_name)\n",
     "        return list(dd.columns)\n",
     "    \n",
     "\n",
+    "    excel_file.change(fn=check_uploaded_file, inputs=excel_file, outputs=sheet_name)\n",
     "    sheet_name.change(fn=update_columns, inputs=[excel_file, sheet_name], outputs=column_name)\n",
     "\n",
     "    model_choosing = gr.Dropdown(multiselect = False , \n",
     "                                 label = \"Choosing Model you want\", \n",
+    "                                 choices = ['ChatGPT (4o-mini)', 'Deepseek (developing ...)', 'another (In Progress)']\n",
     "                                 , interactive=True\n",
     "                                 )\n",
     "\n",
     "    def translate_excel(\n",
     "        file, sheet_name, columns, source_lang, target_lang, place_option, keep_opt, model, api_key\n",
     "    ):\n",
+    "        if os.path.splitext(file.name)[-1].lower() in [\".xlsx\", \".xls\"]:\n",
+    "            if model == \"ChatGPT (4o-mini)\":\n",
+    "                # Call ChatGPT-based translation\n",
+    "                return chat_gpt_translate_excel(\n",
+    "                    file, sheet_name, columns, source_lang, target_lang, place_option, keep_opt, model, api_key\n",
+    "                )\n",
+    "            else:\n",
+    "                # Handle other models (currently in progress)\n",
+    "                raise gr.Error(\"Translation with the selected model is not yet implemented.\")\n",
+    "        elif os.path.splitext(file.name)[-1].lower() in [\".docx\", \".doc\"]:\n",
+    "            if model == \"ChatGPT (4o-mini)\":\n",
+    "                # Call ChatGPT-based translation\n",
+    "                return chat_gpt_translate_word(file, sheet_name, columns, source_lang, target_lang, place_option, keep_opt, model, api_key)\n",
+    "            else:\n",
+    "                # Handle other models (currently in progress)\n",
+    "                raise gr.Error(\"Translation with the selected model is not yet implemented.\")\n",
+    "            \n",
     "        else:\n",
+    "            print('No Type of Input Supported')\n",
     "\n",
     "    # Register button click\n",
     "    translate_button.click(\n",
     "        ],\n",
     "        outputs=output_file,\n",
     "    )\n",
+    "iface.launch(debug=True, server_port= 7861)\n",
     "\n"
    ]
   },

translator_app.py CHANGED Viewed

@@ -6,7 +6,7 @@ from langchain_core.prompts import PromptTemplate
 from langchain_core.runnables import RunnableLambda
 import gradio as gr
 import pandas as pd
 def using_model(chosen_model, api_key):
     if chosen_model == 'ChatGPT (4o-mini)':
@@ -44,7 +44,14 @@ def chat_gpt_translate_excel(file, sheet_name, col_name, source_lang, target_lan
     model = using_model(chosen_model = chosen_model, api_key = api_key)
-    df = pd.read_excel(file.name, sheet_name=sheet_name, header=0)
     original_col = df.columns
     total_columns = len(df.columns)
     current_step = 0
@@ -105,8 +112,6 @@ def chat_gpt_translate_excel(file, sheet_name, col_name, source_lang, target_lan
     #         print(f"Error in column {column}: {e}")
     #         continue
-    output_file = f"{file.name}_translated.xlsx"
     if not os.path.exists(output_file):
         pd.DataFrame().to_excel(output_file, index=False)
@@ -159,8 +164,47 @@ def chat_gpt_translate_excel(file, sheet_name, col_name, source_lang, target_lan
     progress(1.0, desc="Completed all tasks!")
     return output_file
 if __name__ == "__main__" :
     with gr.Blocks() as iface:
         gr.Markdown("## Excel Translation Interface")
@@ -184,7 +228,23 @@ if __name__ == "__main__" :
                                                     'translated_column']
                                                     , interactive=True
                                         )
         def get_sheet_names(file):
             xls = pd.ExcelFile(file.name)
             return xls.sheet_names
@@ -194,15 +254,20 @@ if __name__ == "__main__" :
             return gr.update(choices=sheets)
         def update_columns(file, sheet_name):
-            columns = get_column_names(file, sheet_name)
-            return gr.update(choices=columns)
         def get_column_names(file, sheet_name):
             dd = pd.read_excel(file.name, sheet_name=sheet_name)
             return list(dd.columns)
-        excel_file.change(fn=update_sheets, inputs=excel_file, outputs=sheet_name)
         sheet_name.change(fn=update_columns, inputs=[excel_file, sheet_name], outputs=column_name)
         model_choosing = gr.Dropdown(multiselect = False ,
@@ -217,17 +282,24 @@ if __name__ == "__main__" :
         # Unified translation function
         def translate_excel(
-            file, sheet_name, columns, source_lang, target_lang, place_option, keep_opt, model, api_key
-        ):
-            if model == "ChatGPT (4o-mini)":
-                # Call ChatGPT-based translation
-                return chat_gpt_translate_excel(
-                    file, sheet_name, columns, source_lang, target_lang, place_option, keep_opt, model, api_key
-                )
-            else:
-                # Handle other models (currently in progress)
-                raise gr.Error("Translation with the selected model is not yet implemented.")
         # Register button click
         translate_button.click(
             fn=translate_excel,
@@ -244,6 +316,10 @@ if __name__ == "__main__" :
             ],
             outputs=output_file,
         )
-    iface.launch(debug=True, share=True, server_port= 7860,
                  server_name="0.0.0.0"
                  )

 from langchain_core.runnables import RunnableLambda
 import gradio as gr
 import pandas as pd
+from docx import Document
 def using_model(chosen_model, api_key):
     if chosen_model == 'ChatGPT (4o-mini)':
     model = using_model(chosen_model = chosen_model, api_key = api_key)
+    if isinstance(file, pd.DataFrame):
+        df = file.copy()
+        output_file = f"{file.name.unique()[0].split('.')[0]}_translated.xlsx"
+        df = df.drop(columns=['name'])
+    else:
+        df = pd.read_excel(file.name, sheet_name=sheet_name, header=0)
+        output_file = f"{file.name.split('.')[0]}_translated.xlsx"
     original_col = df.columns
     total_columns = len(df.columns)
     current_step = 0
     #         print(f"Error in column {column}: {e}")
     #         continue
     if not os.path.exists(output_file):
         pd.DataFrame().to_excel(output_file, index=False)
     progress(1.0, desc="Completed all tasks!")
     return output_file
+def extract_word_content_to_excel(file_path):
+    """ ดึงเนื้อหา + รูปภาพจากไฟล์ Word และบันทึกเป็น Excel """
+    doc = Document(file_path)
+    data = []
+    paragraph_count = 0
+    for element in doc.element.body:
+        if element.tag.endswith("p"):  # Paragraph
+            paragraph_text = element.text.strip()
+            paragraph_count += 1
+            data.append([paragraph_count, paragraph_text])  # บันทึกพารากราฟ
+        elif element.tag.endswith("tbl"):  # Table (ถ้ามี)
+            paragraph_count += 1
+            data.append([paragraph_count, "[Table]"])
+        elif element.tag.endswith("drawing"):  # Image (รูปภาพ)
+            paragraph_count += 1
+            data.append([paragraph_count, "[Image]"])
+    # สร้าง DataFrame
+    df = pd.DataFrame(data, columns=["paragraph", "original"])
+    df['name'] = file_path.split('/')[-1]
+    return df
+def chat_gpt_translate_word(file, sheet_name, col_name, source_lang, target_lang, where_to_place, keep_original, chosen_model, api_key = None, progress=gr.Progress()):
+    word_to_excel_file = extract_word_content_to_excel(file)
+    return chat_gpt_translate_excel(word_to_excel_file,
+                             sheet_name="Sheet1",
+                             col_name = ['original'],
+                             source_lang = source_lang,
+                             target_lang = target_lang,
+                             where_to_place="append_all (ต่อ column สุดท้าย)",
+                             keep_original="keep original",
+                             chosen_model = chosen_model,
+                             api_key = api_key
+                             )
 if __name__ == "__main__" :
     with gr.Blocks() as iface:
         gr.Markdown("## Excel Translation Interface")
                                                     'translated_column']
                                                     , interactive=True
                                         )
+        def check_file_type(file):
+            """ ตรวจสอบว่าไฟล์ที่อัปโหลดเป็น Word หรือ Excel """
+            file_extension = os.path.splitext(file.name)[-1].lower()
+            if file_extension in [".docx", ".doc"]:
+                return gr.update(choices=['all paragraphs only', 'specified paragraph or page (Developing ...)'])
+            elif file_extension in [".xlsx", ".xls"]:
+                return update_sheets(file)
+            else:
+                return "Unknown"
+        def check_uploaded_file(file):
+            """ ฟังก์ชันรับไฟล์ที่อัปโหลด แล้วตรวจสอบประเภท """
+            if file is None:
+                return "No file uploaded"
+            return check_file_type(file)
         def get_sheet_names(file):
             xls = pd.ExcelFile(file.name)
             return xls.sheet_names
             return gr.update(choices=sheets)
         def update_columns(file, sheet_name):
+            if os.path.splitext(file.name)[-1].lower() in [".docx", ".doc"]:
+                return gr.update(choices=['original'])
+            elif os.path.splitext(file.name)[-1].lower() in [".xlsx", ".xls"]:
+                columns = get_column_names(file, sheet_name)
+                return gr.update(choices=columns)
+            else:
+                return "error"
         def get_column_names(file, sheet_name):
             dd = pd.read_excel(file.name, sheet_name=sheet_name)
             return list(dd.columns)
+        excel_file.change(fn=check_uploaded_file, inputs=excel_file, outputs=sheet_name)
         sheet_name.change(fn=update_columns, inputs=[excel_file, sheet_name], outputs=column_name)
         model_choosing = gr.Dropdown(multiselect = False ,
         # Unified translation function
         def translate_excel(
+        file, sheet_name, columns, source_lang, target_lang, place_option, keep_opt, model, api_key
+    ):
+            if os.path.splitext(file.name)[-1].lower() in [".xlsx", ".xls"]:
+                if model == "ChatGPT (4o-mini)":
+                    # Call ChatGPT-based translation
+                    return chat_gpt_translate_excel(
+                        file, sheet_name, columns, source_lang, target_lang, place_option, keep_opt, model, api_key
+                    )
+                else:
+                    # Handle other models (currently in progress)
+                    raise gr.Error("Translation with the selected model is not yet implemented.")
+            elif os.path.splitext(file.name)[-1].lower() in [".docx", ".doc"]:
+                if model == "ChatGPT (4o-mini)":
+                    # Call ChatGPT-based translation
+                    return chat_gpt_translate_word(file, sheet_name, columns, source_lang, target_lang, place_option, keep_opt, model, api_key)
+                else:
+                    # Handle other models (currently in progress)
+                    raise gr.Error("Translation with the selected model is not yet implemented.")
         # Register button click
         translate_button.click(
             fn=translate_excel,
             ],
             outputs=output_file,
         )
+    iface.launch(debug=True, share=True,
+                 server_port= 7861,
                  server_name="0.0.0.0"
                  )