Petch DS commited on
Commit
e193988
·
1 Parent(s): 0a3b2e2

addWord_Inprogress1

Browse files
Files changed (4) hide show
  1. .DS_Store +0 -0
  2. translated_output.xlsx +0 -0
  3. translator_app.ipynb +142 -111
  4. translator_app.py +96 -20
.DS_Store ADDED
Binary file (6.15 kB). View file
 
translated_output.xlsx DELETED
Binary file (6.33 kB)
 
translator_app.ipynb CHANGED
@@ -2,7 +2,7 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 33,
6
  "metadata": {
7
  "id": "dKoye1NqPPWX"
8
  },
@@ -20,7 +20,7 @@
20
  },
21
  {
22
  "cell_type": "code",
23
- "execution_count": 21,
24
  "metadata": {},
25
  "outputs": [],
26
  "source": [
@@ -30,77 +30,18 @@
30
  "from langchain_core.prompts import PromptTemplate\n",
31
  "from langchain_core.runnables import RunnableLambda\n",
32
  "import gradio as gr\n",
33
- "import pandas as pd\n",
34
- "from transformers import T5Tokenizer, T5ForConditionalGeneration\n",
35
- "import torch\n"
36
- ]
37
- },
38
- {
39
- "cell_type": "code",
40
- "execution_count": 22,
41
- "metadata": {},
42
- "outputs": [],
43
- "source": [
44
- "# from docx import Document\n",
45
- "\n",
46
- "# # โหลดไฟล์ Word\n",
47
- "# doc = Document('test_file.docx')\n",
48
- "\n",
49
- "# # อ่านทุก paragraph และแสดงเนื้อหา\n",
50
- "# for para in doc.paragraphs:\n",
51
- "# print(para.text)"
52
- ]
53
- },
54
- {
55
- "cell_type": "code",
56
- "execution_count": 23,
57
- "metadata": {},
58
- "outputs": [],
59
- "source": [
60
- "# # อ่านทุกตารางในเอกสาร\n",
61
- "# for table in doc.tables:\n",
62
- "# for row in table.rows:\n",
63
- "# for cell in row.cells:\n",
64
- "# print(cell.text)"
65
- ]
66
- },
67
- {
68
- "cell_type": "code",
69
- "execution_count": 24,
70
- "metadata": {},
71
- "outputs": [],
72
- "source": [
73
- "# for element in doc.element.body:\n",
74
- "# if element.tag.endswith('tbl'):\n",
75
- "# # ถ้าเป็นตาราง\n",
76
- "# print('Table found')\n",
77
- "# elif element.tag.endswith('p'):\n",
78
- "# # ถ้าเป็นพารากราฟ\n",
79
- "# print('Paragraph found')"
80
  ]
81
  },
82
  {
83
  "cell_type": "code",
84
- "execution_count": 25,
85
  "metadata": {},
86
  "outputs": [],
87
  "source": [
88
- "# # นับรูปภาพ\n",
89
- "# images = doc.inline_shapes\n",
90
- "# print(\"Found\", len(images), \"images\")\n",
91
- "\n",
92
- "# # ตัวอย่างวิธีดึงข้อมูลพื้นฐานของรูปภาพแต่ละรูป\n",
93
- "# for image in images:\n",
94
- "# print(\"Image size:\", image.width.pt, \"x\", image.height.pt) "
95
  ]
96
  },
97
- {
98
- "cell_type": "code",
99
- "execution_count": null,
100
- "metadata": {},
101
- "outputs": [],
102
- "source": []
103
- },
104
  {
105
  "cell_type": "markdown",
106
  "metadata": {},
@@ -110,7 +51,7 @@
110
  },
111
  {
112
  "cell_type": "code",
113
- "execution_count": 26,
114
  "metadata": {},
115
  "outputs": [],
116
  "source": [
@@ -121,7 +62,7 @@
121
  "cell_type": "markdown",
122
  "metadata": {},
123
  "source": [
124
- "# Process for Each Model"
125
  ]
126
  },
127
  {
@@ -133,10 +74,16 @@
133
  },
134
  {
135
  "cell_type": "code",
136
- "execution_count": 27,
137
  "metadata": {},
138
  "outputs": [],
139
  "source": [
 
 
 
 
 
 
140
  "\n",
141
  "def chat_gpt_4o_mini(api_key = None):\n",
142
  " model = ChatOpenAI(model_name=\"gpt-4o-mini\", api_key=api_key)\n",
@@ -158,23 +105,22 @@
158
  "\n",
159
  " chain = prompt | model | output_parser | RunnableLambda(get_class) \n",
160
  "\n",
161
- " return chain\n"
162
  ]
163
  },
164
  {
165
  "cell_type": "code",
166
- "execution_count": 28,
167
  "metadata": {},
168
  "outputs": [],
169
- "source": [
170
- "\n",
171
- "\n",
172
- "def using_model(chosen_model, api_key=None):\n",
173
- " if chosen_model == 'ChatGPT (4o-mini)':\n",
174
- " return chat_gpt_4o_mini(api_key=api_key)\n",
175
- " else:\n",
176
- " raise ValueError(\"Unsupported model selected\")\n"
177
- ]
178
  },
179
  {
180
  "cell_type": "markdown",
@@ -185,7 +131,7 @@
185
  },
186
  {
187
  "cell_type": "code",
188
- "execution_count": 35,
189
  "metadata": {
190
  "colab": {
191
  "base_uri": "https://localhost:8080/",
@@ -199,12 +145,18 @@
199
  "\n",
200
  "def chat_gpt_translate_excel(file, sheet_name, col_name, source_lang, target_lang, where_to_place, keep_original, chosen_model, api_key = None, progress=gr.Progress()):\n",
201
  " if where_to_place is None:\n",
202
- " where_to_place = 'append_all (ต่อ column สุดท้าย)'\n",
203
  "\n",
204
  " model = using_model(chosen_model = chosen_model, api_key = api_key)\n",
205
  "\n",
206
- " df = pd.read_excel(file.name, sheet_name=sheet_name, header=0)\n",
207
- " # original columns = col_name + remain_col\n",
 
 
 
 
 
 
208
  " original_col = df.columns\n",
209
  " total_columns = len(df.columns)\n",
210
  " current_step = 0\n",
@@ -212,18 +164,16 @@
212
  " progress(0, desc=\"Starting translation process...\")\n",
213
  "\n",
214
  " # Automatically detect string columns if col_name is None\n",
215
- " # col_name is column we want to translate\n",
216
  " if col_name is None:\n",
217
  " col_name = [col for col in df.columns if df[col].dtype == 'object']\n",
218
  "\n",
219
  " # Determine columns that are not selected for translation\n",
220
- " # remain_col is column we do not want to translate\n",
221
  " remain_col = [col for col in df.columns if col not in col_name]\n",
222
  "\n",
223
  " # Dictionary to store unique values and their translations\n",
224
  " translation_map = {}\n",
225
  " trans_col_name = []\n",
226
- " print(col_name)\n",
227
  "\n",
228
  " # Process the selected columns for translation\n",
229
  " for idx, col in enumerate(col_name):\n",
@@ -253,14 +203,14 @@
253
  " print(f\"Error in column {col}: {e}\")\n",
254
  " continue\n",
255
  "\n",
256
- " # # Process remaining columns\n",
257
  " # for column in remain_col:\n",
258
  " # current_step += 1\n",
259
- " # progress(current_step / total_columns, desc=f\"Translating column name: {column} ({current_step}/{len(remain_col)})...\")\n",
260
  "\n",
261
  " # try:\n",
262
- " # # We do not translate remain_col which remaining col\n",
263
- " # # remain_col = chain.batch([{\"sentence\": column, \"source_lang\": source_lang, \"target_lang\": target_lang}])\n",
264
  " # name_col = column + '_translated' # Assuming the translation returns a list of translations\n",
265
  " # df.loc[:, name_col] = df.loc[:, column]\n",
266
  "\n",
@@ -268,19 +218,14 @@
268
  " # print(f\"Error in column {column}: {e}\")\n",
269
  " # continue\n",
270
  "\n",
271
- " \n",
272
- " output_file = f\"{file.name.split('.')[0]}_translated.xlsx\"\n",
273
  " if not os.path.exists(output_file):\n",
274
  " pd.DataFrame().to_excel(output_file, index=False)\n",
275
  "\n",
276
  " if keep_original == 'keep original':\n",
277
- " # have the all columns\n",
278
  " output_col = original_col\n",
279
  " else:\n",
280
- " # only translated column\n",
281
  " output_col = col_name\n",
282
  "\n",
283
- " \n",
284
  " try:\n",
285
  " if where_to_place == 'append_all (ต่อ column สุดท้าย)':\n",
286
  " final_cols = list(output_col) + [col for col in trans_col_name]\n",
@@ -328,6 +273,59 @@
328
  "\n"
329
  ]
330
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
331
  {
332
  "cell_type": "markdown",
333
  "metadata": {},
@@ -337,7 +335,7 @@
337
  },
338
  {
339
  "cell_type": "code",
340
- "execution_count": 36,
341
  "metadata": {
342
  "id": "x8Njoc4fROSp"
343
  },
@@ -346,7 +344,7 @@
346
  "name": "stdout",
347
  "output_type": "stream",
348
  "text": [
349
- "Running on local URL: http://127.0.0.1:7860\n",
350
  "\n",
351
  "To create a public link, set `share=True` in `launch()`.\n"
352
  ]
@@ -354,7 +352,7 @@
354
  {
355
  "data": {
356
  "text/html": [
357
- "<div><iframe src=\"http://127.0.0.1:7860/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
358
  ],
359
  "text/plain": [
360
  "<IPython.core.display.HTML object>"
@@ -376,7 +374,6 @@
376
  "name": "stdout",
377
  "output_type": "stream",
378
  "text": [
379
- "['control type']\n",
380
  "Keyboard interruption in main thread... closing server.\n"
381
  ]
382
  },
@@ -384,7 +381,7 @@
384
  "data": {
385
  "text/plain": []
386
  },
387
- "execution_count": 36,
388
  "metadata": {},
389
  "output_type": "execute_result"
390
  }
@@ -413,6 +410,24 @@
413
  " 'translated_column']\n",
414
  " , interactive=True\n",
415
  " )\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
416
  " \n",
417
  " def get_sheet_names(file):\n",
418
  " xls = pd.ExcelFile(file.name)\n",
@@ -423,20 +438,25 @@
423
  " return gr.update(choices=sheets)\n",
424
  "\n",
425
  " def update_columns(file, sheet_name):\n",
426
- " columns = get_column_names(file, sheet_name)\n",
427
- " return gr.update(choices=columns)\n",
 
 
 
 
 
428
  "\n",
429
  " def get_column_names(file, sheet_name):\n",
430
  " dd = pd.read_excel(file.name, sheet_name=sheet_name)\n",
431
  " return list(dd.columns)\n",
432
  " \n",
433
  "\n",
434
- " excel_file.change(fn=update_sheets, inputs=excel_file, outputs=sheet_name)\n",
435
  " sheet_name.change(fn=update_columns, inputs=[excel_file, sheet_name], outputs=column_name)\n",
436
  "\n",
437
  " model_choosing = gr.Dropdown(multiselect = False , \n",
438
  " label = \"Choosing Model you want\", \n",
439
- " choices = ['ChatGPT (4o-mini)', 'DeepSeek (developing...)','another (In Progress)']\n",
440
  " , interactive=True\n",
441
  " )\n",
442
  "\n",
@@ -448,14 +468,25 @@
448
  " def translate_excel(\n",
449
  " file, sheet_name, columns, source_lang, target_lang, place_option, keep_opt, model, api_key\n",
450
  " ):\n",
451
- " if model == \"ChatGPT (4o-mini)\":\n",
452
- " # Call ChatGPT-based translation\n",
453
- " return chat_gpt_translate_excel(\n",
454
- " file, sheet_name, columns, source_lang, target_lang, place_option, keep_opt, model, api_key\n",
455
- " )\n",
 
 
 
 
 
 
 
 
 
 
 
 
456
  " else:\n",
457
- " # Handle other models (currently in progress)\n",
458
- " raise gr.Error(\"Translation with the selected model is not yet implemented.\")\n",
459
  "\n",
460
  " # Register button click\n",
461
  " translate_button.click(\n",
@@ -473,7 +504,7 @@
473
  " ],\n",
474
  " outputs=output_file,\n",
475
  " )\n",
476
- "iface.launch(debug=True)\n",
477
  "\n"
478
  ]
479
  },
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 34,
6
  "metadata": {
7
  "id": "dKoye1NqPPWX"
8
  },
 
20
  },
21
  {
22
  "cell_type": "code",
23
+ "execution_count": 35,
24
  "metadata": {},
25
  "outputs": [],
26
  "source": [
 
30
  "from langchain_core.prompts import PromptTemplate\n",
31
  "from langchain_core.runnables import RunnableLambda\n",
32
  "import gradio as gr\n",
33
+ "import pandas as pd"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  ]
35
  },
36
  {
37
  "cell_type": "code",
38
+ "execution_count": 36,
39
  "metadata": {},
40
  "outputs": [],
41
  "source": [
42
+ "from docx import Document"
 
 
 
 
 
 
43
  ]
44
  },
 
 
 
 
 
 
 
45
  {
46
  "cell_type": "markdown",
47
  "metadata": {},
 
51
  },
52
  {
53
  "cell_type": "code",
54
+ "execution_count": 37,
55
  "metadata": {},
56
  "outputs": [],
57
  "source": [
 
62
  "cell_type": "markdown",
63
  "metadata": {},
64
  "source": [
65
+ "# Process"
66
  ]
67
  },
68
  {
 
74
  },
75
  {
76
  "cell_type": "code",
77
+ "execution_count": 38,
78
  "metadata": {},
79
  "outputs": [],
80
  "source": [
81
+ "def using_model(chosen_model, api_key):\n",
82
+ " if chosen_model == 'ChatGPT (4o-mini)':\n",
83
+ " model = chat_gpt_4o_mini(api_key = api_key)\n",
84
+ " else:\n",
85
+ " pass\n",
86
+ " return model\n",
87
  "\n",
88
  "def chat_gpt_4o_mini(api_key = None):\n",
89
  " model = ChatOpenAI(model_name=\"gpt-4o-mini\", api_key=api_key)\n",
 
105
  "\n",
106
  " chain = prompt | model | output_parser | RunnableLambda(get_class) \n",
107
  "\n",
108
+ " return chain"
109
  ]
110
  },
111
  {
112
  "cell_type": "code",
113
+ "execution_count": null,
114
  "metadata": {},
115
  "outputs": [],
116
+ "source": []
117
+ },
118
+ {
119
+ "cell_type": "code",
120
+ "execution_count": null,
121
+ "metadata": {},
122
+ "outputs": [],
123
+ "source": []
 
124
  },
125
  {
126
  "cell_type": "markdown",
 
131
  },
132
  {
133
  "cell_type": "code",
134
+ "execution_count": 39,
135
  "metadata": {
136
  "colab": {
137
  "base_uri": "https://localhost:8080/",
 
145
  "\n",
146
  "def chat_gpt_translate_excel(file, sheet_name, col_name, source_lang, target_lang, where_to_place, keep_original, chosen_model, api_key = None, progress=gr.Progress()):\n",
147
  " if where_to_place is None:\n",
148
+ " where_to_place = 'append_all'\n",
149
  "\n",
150
  " model = using_model(chosen_model = chosen_model, api_key = api_key)\n",
151
  "\n",
152
+ " if isinstance(file, pd.DataFrame):\n",
153
+ " df = file.copy()\n",
154
+ " output_file = f\"{file.name.unique()[0].split('.')[0]}_translated.xlsx\"\n",
155
+ " df = df.drop(columns=['name'])\n",
156
+ " else:\n",
157
+ " df = pd.read_excel(file.name, sheet_name=sheet_name, header=0)\n",
158
+ " output_file = f\"{file.name.split('.')[0]}_translated.xlsx\"\n",
159
+ "\n",
160
  " original_col = df.columns\n",
161
  " total_columns = len(df.columns)\n",
162
  " current_step = 0\n",
 
164
  " progress(0, desc=\"Starting translation process...\")\n",
165
  "\n",
166
  " # Automatically detect string columns if col_name is None\n",
 
167
  " if col_name is None:\n",
168
  " col_name = [col for col in df.columns if df[col].dtype == 'object']\n",
169
  "\n",
170
  " # Determine columns that are not selected for translation\n",
 
171
  " remain_col = [col for col in df.columns if col not in col_name]\n",
172
  "\n",
173
  " # Dictionary to store unique values and their translations\n",
174
  " translation_map = {}\n",
175
  " trans_col_name = []\n",
176
+ "\n",
177
  "\n",
178
  " # Process the selected columns for translation\n",
179
  " for idx, col in enumerate(col_name):\n",
 
203
  " print(f\"Error in column {col}: {e}\")\n",
204
  " continue\n",
205
  "\n",
206
+ " # Process remaining columns\n",
207
  " # for column in remain_col:\n",
208
  " # current_step += 1\n",
209
+ " # progress(current_step / total_columns, desc=f\"Translating column name: {column} ({current_step}/{total_columns})...\")\n",
210
  "\n",
211
  " # try:\n",
212
+ " # # We do not translate all_col which remaining col\n",
213
+ " # # all_col_translation = chain.batch([{\"sentence\": column, \"source_lang\": source_lang, \"target_lang\": target_lang}])\n",
214
  " # name_col = column + '_translated' # Assuming the translation returns a list of translations\n",
215
  " # df.loc[:, name_col] = df.loc[:, column]\n",
216
  "\n",
 
218
  " # print(f\"Error in column {column}: {e}\")\n",
219
  " # continue\n",
220
  "\n",
 
 
221
  " if not os.path.exists(output_file):\n",
222
  " pd.DataFrame().to_excel(output_file, index=False)\n",
223
  "\n",
224
  " if keep_original == 'keep original':\n",
 
225
  " output_col = original_col\n",
226
  " else:\n",
 
227
  " output_col = col_name\n",
228
  "\n",
 
229
  " try:\n",
230
  " if where_to_place == 'append_all (ต่อ column สุดท้าย)':\n",
231
  " final_cols = list(output_col) + [col for col in trans_col_name]\n",
 
273
  "\n"
274
  ]
275
  },
276
+ {
277
+ "cell_type": "code",
278
+ "execution_count": 40,
279
+ "metadata": {},
280
+ "outputs": [],
281
+ "source": [
282
+ "def extract_word_content_to_excel(file_path):\n",
283
+ " \"\"\" ดึงเนื้อหา + รูปภาพจากไฟล์ Word และบันทึกเป็น Excel \"\"\"\n",
284
+ " doc = Document(file_path)\n",
285
+ " \n",
286
+ " data = []\n",
287
+ " paragraph_count = 0\n",
288
+ "\n",
289
+ " for element in doc.element.body:\n",
290
+ " if element.tag.endswith(\"p\"): # Paragraph\n",
291
+ " paragraph_text = element.text.strip()\n",
292
+ " paragraph_count += 1\n",
293
+ " data.append([paragraph_count, paragraph_text]) # บันทึกพารากราฟ\n",
294
+ "\n",
295
+ " elif element.tag.endswith(\"tbl\"): # Table (ถ้ามี)\n",
296
+ " paragraph_count += 1\n",
297
+ " data.append([paragraph_count, \"[Table]\"])\n",
298
+ "\n",
299
+ " elif element.tag.endswith(\"drawing\"): # Image (รูปภาพ)\n",
300
+ " paragraph_count += 1\n",
301
+ " data.append([paragraph_count, \"[Image]\"])\n",
302
+ "\n",
303
+ " # สร้าง DataFrame\n",
304
+ " df = pd.DataFrame(data, columns=[\"paragraph\", \"original\"])\n",
305
+ " df['name'] = file_path.split('/')[-1]\n",
306
+ " return df\n",
307
+ "\n",
308
+ "def chat_gpt_translate_word(file, sheet_name, col_name, source_lang, target_lang, where_to_place, keep_original, chosen_model, api_key = None, progress=gr.Progress()):\n",
309
+ " word_to_excel_file = extract_word_content_to_excel(file)\n",
310
+ " return chat_gpt_translate_excel(word_to_excel_file, \n",
311
+ " sheet_name=\"Sheet1\", \n",
312
+ " col_name = ['original'], \n",
313
+ " source_lang = source_lang, \n",
314
+ " target_lang = target_lang, \n",
315
+ " where_to_place=\"append_all (ต่อ column สุดท้าย)\", \n",
316
+ " keep_original=\"keep original\", \n",
317
+ " chosen_model = chosen_model, \n",
318
+ " api_key = api_key\n",
319
+ " )"
320
+ ]
321
+ },
322
+ {
323
+ "cell_type": "code",
324
+ "execution_count": null,
325
+ "metadata": {},
326
+ "outputs": [],
327
+ "source": []
328
+ },
329
  {
330
  "cell_type": "markdown",
331
  "metadata": {},
 
335
  },
336
  {
337
  "cell_type": "code",
338
+ "execution_count": 41,
339
  "metadata": {
340
  "id": "x8Njoc4fROSp"
341
  },
 
344
  "name": "stdout",
345
  "output_type": "stream",
346
  "text": [
347
+ "Running on local URL: http://127.0.0.1:7861\n",
348
  "\n",
349
  "To create a public link, set `share=True` in `launch()`.\n"
350
  ]
 
352
  {
353
  "data": {
354
  "text/html": [
355
+ "<div><iframe src=\"http://127.0.0.1:7861/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
356
  ],
357
  "text/plain": [
358
  "<IPython.core.display.HTML object>"
 
374
  "name": "stdout",
375
  "output_type": "stream",
376
  "text": [
 
377
  "Keyboard interruption in main thread... closing server.\n"
378
  ]
379
  },
 
381
  "data": {
382
  "text/plain": []
383
  },
384
+ "execution_count": 41,
385
  "metadata": {},
386
  "output_type": "execute_result"
387
  }
 
410
  " 'translated_column']\n",
411
  " , interactive=True\n",
412
  " )\n",
413
+ "\n",
414
+ " def check_file_type(file):\n",
415
+ " \"\"\" ตรวจสอบว่าไฟล์ที่อัปโหลดเป็น Word หรือ Excel \"\"\"\n",
416
+ " file_extension = os.path.splitext(file.name)[-1].lower()\n",
417
+ "\n",
418
+ " if file_extension in [\".docx\", \".doc\"]:\n",
419
+ " return gr.update(choices=['all paragraphs only', 'specified paragraph or page (Developing ...)'])\n",
420
+ " elif file_extension in [\".xlsx\", \".xls\"]:\n",
421
+ " return update_sheets(file)\n",
422
+ " else:\n",
423
+ " return \"Unknown\"\n",
424
+ " \n",
425
+ " def check_uploaded_file(file):\n",
426
+ " \"\"\" ฟังก์ชันรับไฟล์ที่อัปโหลด แล้วตรวจสอบประเภท \"\"\"\n",
427
+ " if file is None:\n",
428
+ " return \"No file uploaded\"\n",
429
+ " return check_file_type(file)\n",
430
+ "\n",
431
  " \n",
432
  " def get_sheet_names(file):\n",
433
  " xls = pd.ExcelFile(file.name)\n",
 
438
  " return gr.update(choices=sheets)\n",
439
  "\n",
440
  " def update_columns(file, sheet_name):\n",
441
+ " if os.path.splitext(file.name)[-1].lower() in [\".docx\", \".doc\"]:\n",
442
+ " return gr.update(choices=['original'])\n",
443
+ " elif os.path.splitext(file.name)[-1].lower() in [\".xlsx\", \".xls\"]:\n",
444
+ " columns = get_column_names(file, sheet_name)\n",
445
+ " return gr.update(choices=columns)\n",
446
+ " else:\n",
447
+ " return \"error\"\n",
448
  "\n",
449
  " def get_column_names(file, sheet_name):\n",
450
  " dd = pd.read_excel(file.name, sheet_name=sheet_name)\n",
451
  " return list(dd.columns)\n",
452
  " \n",
453
  "\n",
454
+ " excel_file.change(fn=check_uploaded_file, inputs=excel_file, outputs=sheet_name)\n",
455
  " sheet_name.change(fn=update_columns, inputs=[excel_file, sheet_name], outputs=column_name)\n",
456
  "\n",
457
  " model_choosing = gr.Dropdown(multiselect = False , \n",
458
  " label = \"Choosing Model you want\", \n",
459
+ " choices = ['ChatGPT (4o-mini)', 'Deepseek (developing ...)', 'another (In Progress)']\n",
460
  " , interactive=True\n",
461
  " )\n",
462
  "\n",
 
468
  " def translate_excel(\n",
469
  " file, sheet_name, columns, source_lang, target_lang, place_option, keep_opt, model, api_key\n",
470
  " ):\n",
471
+ " if os.path.splitext(file.name)[-1].lower() in [\".xlsx\", \".xls\"]:\n",
472
+ " if model == \"ChatGPT (4o-mini)\":\n",
473
+ " # Call ChatGPT-based translation\n",
474
+ " return chat_gpt_translate_excel(\n",
475
+ " file, sheet_name, columns, source_lang, target_lang, place_option, keep_opt, model, api_key\n",
476
+ " )\n",
477
+ " else:\n",
478
+ " # Handle other models (currently in progress)\n",
479
+ " raise gr.Error(\"Translation with the selected model is not yet implemented.\")\n",
480
+ " elif os.path.splitext(file.name)[-1].lower() in [\".docx\", \".doc\"]:\n",
481
+ " if model == \"ChatGPT (4o-mini)\":\n",
482
+ " # Call ChatGPT-based translation\n",
483
+ " return chat_gpt_translate_word(file, sheet_name, columns, source_lang, target_lang, place_option, keep_opt, model, api_key)\n",
484
+ " else:\n",
485
+ " # Handle other models (currently in progress)\n",
486
+ " raise gr.Error(\"Translation with the selected model is not yet implemented.\")\n",
487
+ " \n",
488
  " else:\n",
489
+ " print('No Type of Input Supported')\n",
 
490
  "\n",
491
  " # Register button click\n",
492
  " translate_button.click(\n",
 
504
  " ],\n",
505
  " outputs=output_file,\n",
506
  " )\n",
507
+ "iface.launch(debug=True, server_port= 7861)\n",
508
  "\n"
509
  ]
510
  },
translator_app.py CHANGED
@@ -6,7 +6,7 @@ from langchain_core.prompts import PromptTemplate
6
  from langchain_core.runnables import RunnableLambda
7
  import gradio as gr
8
  import pandas as pd
9
-
10
 
11
  def using_model(chosen_model, api_key):
12
  if chosen_model == 'ChatGPT (4o-mini)':
@@ -44,7 +44,14 @@ def chat_gpt_translate_excel(file, sheet_name, col_name, source_lang, target_lan
44
 
45
  model = using_model(chosen_model = chosen_model, api_key = api_key)
46
 
47
- df = pd.read_excel(file.name, sheet_name=sheet_name, header=0)
 
 
 
 
 
 
 
48
  original_col = df.columns
49
  total_columns = len(df.columns)
50
  current_step = 0
@@ -105,8 +112,6 @@ def chat_gpt_translate_excel(file, sheet_name, col_name, source_lang, target_lan
105
  # print(f"Error in column {column}: {e}")
106
  # continue
107
 
108
-
109
- output_file = f"{file.name}_translated.xlsx"
110
  if not os.path.exists(output_file):
111
  pd.DataFrame().to_excel(output_file, index=False)
112
 
@@ -159,8 +164,47 @@ def chat_gpt_translate_excel(file, sheet_name, col_name, source_lang, target_lan
159
  progress(1.0, desc="Completed all tasks!")
160
  return output_file
161
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
 
163
  if __name__ == "__main__" :
 
164
  with gr.Blocks() as iface:
165
  gr.Markdown("## Excel Translation Interface")
166
 
@@ -184,7 +228,23 @@ if __name__ == "__main__" :
184
  'translated_column']
185
  , interactive=True
186
  )
187
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  def get_sheet_names(file):
189
  xls = pd.ExcelFile(file.name)
190
  return xls.sheet_names
@@ -194,15 +254,20 @@ if __name__ == "__main__" :
194
  return gr.update(choices=sheets)
195
 
196
  def update_columns(file, sheet_name):
197
- columns = get_column_names(file, sheet_name)
198
- return gr.update(choices=columns)
 
 
 
 
 
199
 
200
  def get_column_names(file, sheet_name):
201
  dd = pd.read_excel(file.name, sheet_name=sheet_name)
202
  return list(dd.columns)
203
 
204
 
205
- excel_file.change(fn=update_sheets, inputs=excel_file, outputs=sheet_name)
206
  sheet_name.change(fn=update_columns, inputs=[excel_file, sheet_name], outputs=column_name)
207
 
208
  model_choosing = gr.Dropdown(multiselect = False ,
@@ -217,17 +282,24 @@ if __name__ == "__main__" :
217
 
218
  # Unified translation function
219
  def translate_excel(
220
- file, sheet_name, columns, source_lang, target_lang, place_option, keep_opt, model, api_key
221
- ):
222
- if model == "ChatGPT (4o-mini)":
223
- # Call ChatGPT-based translation
224
- return chat_gpt_translate_excel(
225
- file, sheet_name, columns, source_lang, target_lang, place_option, keep_opt, model, api_key
226
- )
227
- else:
228
- # Handle other models (currently in progress)
229
- raise gr.Error("Translation with the selected model is not yet implemented.")
230
-
 
 
 
 
 
 
 
231
  # Register button click
232
  translate_button.click(
233
  fn=translate_excel,
@@ -244,6 +316,10 @@ if __name__ == "__main__" :
244
  ],
245
  outputs=output_file,
246
  )
247
- iface.launch(debug=True, share=True, server_port= 7860,
 
 
 
 
248
  server_name="0.0.0.0"
249
  )
 
6
  from langchain_core.runnables import RunnableLambda
7
  import gradio as gr
8
  import pandas as pd
9
+ from docx import Document
10
 
11
  def using_model(chosen_model, api_key):
12
  if chosen_model == 'ChatGPT (4o-mini)':
 
44
 
45
  model = using_model(chosen_model = chosen_model, api_key = api_key)
46
 
47
+ if isinstance(file, pd.DataFrame):
48
+ df = file.copy()
49
+ output_file = f"{file.name.unique()[0].split('.')[0]}_translated.xlsx"
50
+ df = df.drop(columns=['name'])
51
+ else:
52
+ df = pd.read_excel(file.name, sheet_name=sheet_name, header=0)
53
+ output_file = f"{file.name.split('.')[0]}_translated.xlsx"
54
+
55
  original_col = df.columns
56
  total_columns = len(df.columns)
57
  current_step = 0
 
112
  # print(f"Error in column {column}: {e}")
113
  # continue
114
 
 
 
115
  if not os.path.exists(output_file):
116
  pd.DataFrame().to_excel(output_file, index=False)
117
 
 
164
  progress(1.0, desc="Completed all tasks!")
165
  return output_file
166
 
167
+ def extract_word_content_to_excel(file_path):
168
+ """ ดึงเนื้อหา + รูปภาพจากไฟล์ Word และบันทึกเป็น Excel """
169
+ doc = Document(file_path)
170
+
171
+ data = []
172
+ paragraph_count = 0
173
+
174
+ for element in doc.element.body:
175
+ if element.tag.endswith("p"): # Paragraph
176
+ paragraph_text = element.text.strip()
177
+ paragraph_count += 1
178
+ data.append([paragraph_count, paragraph_text]) # บันทึกพารากราฟ
179
+
180
+ elif element.tag.endswith("tbl"): # Table (ถ้ามี)
181
+ paragraph_count += 1
182
+ data.append([paragraph_count, "[Table]"])
183
+
184
+ elif element.tag.endswith("drawing"): # Image (รูปภาพ)
185
+ paragraph_count += 1
186
+ data.append([paragraph_count, "[Image]"])
187
+
188
+ # สร้าง DataFrame
189
+ df = pd.DataFrame(data, columns=["paragraph", "original"])
190
+ df['name'] = file_path.split('/')[-1]
191
+ return df
192
+
193
+ def chat_gpt_translate_word(file, sheet_name, col_name, source_lang, target_lang, where_to_place, keep_original, chosen_model, api_key = None, progress=gr.Progress()):
194
+ word_to_excel_file = extract_word_content_to_excel(file)
195
+ return chat_gpt_translate_excel(word_to_excel_file,
196
+ sheet_name="Sheet1",
197
+ col_name = ['original'],
198
+ source_lang = source_lang,
199
+ target_lang = target_lang,
200
+ where_to_place="append_all (ต่อ column สุดท้าย)",
201
+ keep_original="keep original",
202
+ chosen_model = chosen_model,
203
+ api_key = api_key
204
+ )
205
 
206
  if __name__ == "__main__" :
207
+
208
  with gr.Blocks() as iface:
209
  gr.Markdown("## Excel Translation Interface")
210
 
 
228
  'translated_column']
229
  , interactive=True
230
  )
231
+ def check_file_type(file):
232
+ """ ตรวจสอบว่าไฟล์ที่อัปโหลดเป็น Word หรือ Excel """
233
+ file_extension = os.path.splitext(file.name)[-1].lower()
234
+
235
+ if file_extension in [".docx", ".doc"]:
236
+ return gr.update(choices=['all paragraphs only', 'specified paragraph or page (Developing ...)'])
237
+ elif file_extension in [".xlsx", ".xls"]:
238
+ return update_sheets(file)
239
+ else:
240
+ return "Unknown"
241
+
242
+ def check_uploaded_file(file):
243
+ """ ฟังก์ชันรับไฟล์ที่อัปโหลด แล้วตรวจสอบประเภท """
244
+ if file is None:
245
+ return "No file uploaded"
246
+ return check_file_type(file)
247
+
248
  def get_sheet_names(file):
249
  xls = pd.ExcelFile(file.name)
250
  return xls.sheet_names
 
254
  return gr.update(choices=sheets)
255
 
256
  def update_columns(file, sheet_name):
257
+ if os.path.splitext(file.name)[-1].lower() in [".docx", ".doc"]:
258
+ return gr.update(choices=['original'])
259
+ elif os.path.splitext(file.name)[-1].lower() in [".xlsx", ".xls"]:
260
+ columns = get_column_names(file, sheet_name)
261
+ return gr.update(choices=columns)
262
+ else:
263
+ return "error"
264
 
265
  def get_column_names(file, sheet_name):
266
  dd = pd.read_excel(file.name, sheet_name=sheet_name)
267
  return list(dd.columns)
268
 
269
 
270
+ excel_file.change(fn=check_uploaded_file, inputs=excel_file, outputs=sheet_name)
271
  sheet_name.change(fn=update_columns, inputs=[excel_file, sheet_name], outputs=column_name)
272
 
273
  model_choosing = gr.Dropdown(multiselect = False ,
 
282
 
283
  # Unified translation function
284
  def translate_excel(
285
+ file, sheet_name, columns, source_lang, target_lang, place_option, keep_opt, model, api_key
286
+ ):
287
+ if os.path.splitext(file.name)[-1].lower() in [".xlsx", ".xls"]:
288
+ if model == "ChatGPT (4o-mini)":
289
+ # Call ChatGPT-based translation
290
+ return chat_gpt_translate_excel(
291
+ file, sheet_name, columns, source_lang, target_lang, place_option, keep_opt, model, api_key
292
+ )
293
+ else:
294
+ # Handle other models (currently in progress)
295
+ raise gr.Error("Translation with the selected model is not yet implemented.")
296
+ elif os.path.splitext(file.name)[-1].lower() in [".docx", ".doc"]:
297
+ if model == "ChatGPT (4o-mini)":
298
+ # Call ChatGPT-based translation
299
+ return chat_gpt_translate_word(file, sheet_name, columns, source_lang, target_lang, place_option, keep_opt, model, api_key)
300
+ else:
301
+ # Handle other models (currently in progress)
302
+ raise gr.Error("Translation with the selected model is not yet implemented.")
303
  # Register button click
304
  translate_button.click(
305
  fn=translate_excel,
 
316
  ],
317
  outputs=output_file,
318
  )
319
+
320
+
321
+
322
+ iface.launch(debug=True, share=True,
323
+ server_port= 7861,
324
  server_name="0.0.0.0"
325
  )