Petch DS commited on
Commit
632d5cf
·
1 Parent(s): 89740be

Fix rebase conflict

Browse files
Files changed (5) hide show
  1. Dockerfile +25 -0
  2. requirements.txt +10 -1
  3. translated_output.xlsx +0 -0
  4. translator_app.ipynb +422 -0
  5. translator_app.py +236 -0
Dockerfile ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Dockerfile for Translator App
2
+
3
+ # Use an official Python runtime as a parent image
4
+ FROM python:3.9-slim
5
+
6
+ # Set the working directory in the container
7
+ WORKDIR /app
8
+
9
+ # Copy the requirements file and application files into the container
10
+ COPY requirements.txt ./
11
+ COPY translator_app.py ./Translator/
12
+
13
+ # Install Python dependencies
14
+ RUN pip install --no-cache-dir -r requirements.txt
15
+
16
+ # Install Jupyter and necessary extensions
17
+ # RUN pip install jupyter jupyter-server jupyterlab
18
+
19
+ # Expose port for Jupyter Notebook
20
+ EXPOSE 7860
21
+
22
+ # Run Jupyter Notebook
23
+ # CMD ["jupyter", "notebook", "./Translator/translator_app.ipynb", "--ip=0.0.0.0", "--port=8888", "--no-browser", "--allow-root"]
24
+ # Run Gradio application
25
+ CMD ["python", "/app/Translator/translator_app.py"]
requirements.txt CHANGED
@@ -1 +1,10 @@
1
- huggingface_hub==0.25.2
 
 
 
 
 
 
 
 
 
 
1
+ <<<<<<< HEAD
2
+ huggingface_hub==0.25.2
3
+ =======
4
+ gradio==4.44.0
5
+ langchain-openai
6
+ xlsxwriter==3.2.0
7
+ pandas==2.0.3
8
+ numpy==1.24.3
9
+ openpyxl==3.1.5
10
+ >>>>>>> 9a3f3c9 (first app commited)
translated_output.xlsx ADDED
Binary file (6.33 kB). View file
 
translator_app.ipynb ADDED
@@ -0,0 +1,422 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 33,
6
+ "metadata": {
7
+ "id": "dKoye1NqPPWX"
8
+ },
9
+ "outputs": [],
10
+ "source": [
11
+ "# pip install -q -U gradio langchain-openai xlsxwriter"
12
+ ]
13
+ },
14
+ {
15
+ "cell_type": "markdown",
16
+ "metadata": {},
17
+ "source": [
18
+ "# Library Import\n"
19
+ ]
20
+ },
21
+ {
22
+ "cell_type": "code",
23
+ "execution_count": 2,
24
+ "metadata": {},
25
+ "outputs": [],
26
+ "source": [
27
+ "import os\n",
28
+ "from langchain_openai import ChatOpenAI\n",
29
+ "from langchain_core.output_parsers import JsonOutputParser\n",
30
+ "from langchain_core.prompts import PromptTemplate\n",
31
+ "from langchain_core.runnables import RunnableLambda\n",
32
+ "import gradio as gr\n",
33
+ "import pandas as pd"
34
+ ]
35
+ },
36
+ {
37
+ "cell_type": "markdown",
38
+ "metadata": {},
39
+ "source": [
40
+ "# API Key"
41
+ ]
42
+ },
43
+ {
44
+ "cell_type": "code",
45
+ "execution_count": 2,
46
+ "metadata": {},
47
+ "outputs": [],
48
+ "source": [
49
+ "# ..."
50
+ ]
51
+ },
52
+ {
53
+ "cell_type": "markdown",
54
+ "metadata": {},
55
+ "source": [
56
+ "# Process"
57
+ ]
58
+ },
59
+ {
60
+ "cell_type": "markdown",
61
+ "metadata": {},
62
+ "source": [
63
+ "## Using ChatGPT-4o-mini"
64
+ ]
65
+ },
66
+ {
67
+ "cell_type": "code",
68
+ "execution_count": 3,
69
+ "metadata": {},
70
+ "outputs": [],
71
+ "source": [
72
+ "def using_model(chosen_model, api_key):\n",
73
+ " if chosen_model == 'ChatGPT (4o-mini)':\n",
74
+ " model = chat_gpt_4o_mini(api_key = api_key)\n",
75
+ " else:\n",
76
+ " pass\n",
77
+ " return model\n",
78
+ "\n",
79
+ "def chat_gpt_4o_mini(api_key = None):\n",
80
+ " model = ChatOpenAI(model_name=\"gpt-4o-mini\", api_key=api_key)\n",
81
+ "\n",
82
+ " str_prompt =\"\"\"\n",
83
+ " You will be provided with a sentence in {source_lang}, and your task is to translate it into {target_lang}.\n",
84
+ " Answer in Json format with key 'translated'\n",
85
+ " Sentence: {sentence}\n",
86
+ " \"\"\"\n",
87
+ "\n",
88
+ " output_parser = JsonOutputParser()\n",
89
+ " prompt = PromptTemplate(\n",
90
+ " template = str_prompt,\n",
91
+ " input_variables=[\"source_lang\",\"target_lang\",\"sentence\"],\n",
92
+ " partial_variables={\"format_instructions\": output_parser.get_format_instructions()}\n",
93
+ " )\n",
94
+ " def get_class(x:dict)->str:\n",
95
+ " return x[\"translated\"]\n",
96
+ "\n",
97
+ " chain = prompt | model | output_parser | RunnableLambda(get_class) \n",
98
+ "\n",
99
+ " return chain"
100
+ ]
101
+ },
102
+ {
103
+ "cell_type": "markdown",
104
+ "metadata": {},
105
+ "source": [
106
+ "## Translate (excel) for Chat GPT"
107
+ ]
108
+ },
109
+ {
110
+ "cell_type": "code",
111
+ "execution_count": null,
112
+ "metadata": {
113
+ "colab": {
114
+ "base_uri": "https://localhost:8080/",
115
+ "height": 682
116
+ },
117
+ "id": "-0K4um1jPEk4",
118
+ "outputId": "9fc1316b-36db-47e8-a3c1-fa85953b1524"
119
+ },
120
+ "outputs": [],
121
+ "source": [
122
+ "\n",
123
+ "def chat_gpt_translate_excel(file, sheet_name, col_name, source_lang, target_lang, where_to_place, keep_original, chosen_model, api_key = None, progress=gr.Progress()):\n",
124
+ " if where_to_place is None:\n",
125
+ " where_to_place = 'append_all'\n",
126
+ "\n",
127
+ " model = using_model(chosen_model = chosen_model, api_key = api_key)\n",
128
+ "\n",
129
+ " df = pd.read_excel(file.name, sheet_name=sheet_name, header=0)\n",
130
+ " original_col = df.columns\n",
131
+ " total_columns = len(df.columns)\n",
132
+ " current_step = 0\n",
133
+ "\n",
134
+ " progress(0, desc=\"Starting translation process...\")\n",
135
+ "\n",
136
+ " # Automatically detect string columns if col_name is None\n",
137
+ " if col_name is None:\n",
138
+ " col_name = [col for col in df.columns if df[col].dtype == 'object']\n",
139
+ "\n",
140
+ " # Determine columns that are not selected for translation\n",
141
+ " all_col = [col for col in df.columns if col not in col_name]\n",
142
+ "\n",
143
+ " # Dictionary to store unique values and their translations\n",
144
+ " translation_map = {}\n",
145
+ " print(col_name)\n",
146
+ "\n",
147
+ " # Process the selected columns for translation\n",
148
+ " for idx, col in enumerate(col_name):\n",
149
+ " current_step += 1\n",
150
+ " progress(current_step / total_columns, desc=f\"Translating {col} ({current_step}/{total_columns})...\")\n",
151
+ "\n",
152
+ " try:\n",
153
+ " # Extract unique values from the column\n",
154
+ " unique_values = df[col].dropna().unique()\n",
155
+ " unique_values = list(set(unique_values)) # Ensure uniqueness\n",
156
+ "\n",
157
+ " # Prepare data for translation\n",
158
+ " zh_sentence = [{\"sentence\": value, \"source_lang\": source_lang, \"target_lang\": target_lang} for value in unique_values]\n",
159
+ "\n",
160
+ " # Translate unique values\n",
161
+ " answers = model.batch(zh_sentence, config={\"max_concurrency\": 3})\n",
162
+ " \n",
163
+ " # Create a mapping from original values to translated values\n",
164
+ " translations = dict(zip(unique_values, answers))\n",
165
+ " translation_map[col] = translations\n",
166
+ "\n",
167
+ " # Map translations back to the original DataFrame\n",
168
+ " df[col + \"_translated\"] = df[col].map(translations).fillna(df[col])\n",
169
+ "\n",
170
+ " except Exception as e:\n",
171
+ " print(f\"Error in column {col}: {e}\")\n",
172
+ " continue\n",
173
+ "\n",
174
+ " # Process remaining columns\n",
175
+ " for column in all_col:\n",
176
+ " current_step += 1\n",
177
+ " progress(current_step / total_columns, desc=f\"Translating column name: {column} ({current_step}/{total_columns})...\")\n",
178
+ "\n",
179
+ " try:\n",
180
+ " # We do not translate all_col which remaining col\n",
181
+ " # all_col_translation = chain.batch([{\"sentence\": column, \"source_lang\": source_lang, \"target_lang\": target_lang}])\n",
182
+ " name_col = column + '_translated' # Assuming the translation returns a list of translations\n",
183
+ " df.loc[:, name_col] = df.loc[:, column]\n",
184
+ "\n",
185
+ " except Exception as e:\n",
186
+ " print(f\"Error in column {column}: {e}\")\n",
187
+ " continue\n",
188
+ "\n",
189
+ " \n",
190
+ " output_file = \"translated_output.xlsx\"\n",
191
+ " if not os.path.exists(output_file):\n",
192
+ " pd.DataFrame().to_excel(output_file, index=False)\n",
193
+ "\n",
194
+ " if keep_original == 'keep original':\n",
195
+ " output_col = original_col\n",
196
+ " else:\n",
197
+ " output_col = col_name\n",
198
+ "\n",
199
+ " \n",
200
+ " try:\n",
201
+ " if where_to_place == 'append_all (ต่อ column สุดท้าย)':\n",
202
+ " final_cols = list(output_col) + [col + '_translated' for col in output_col]\n",
203
+ " result = df[final_cols]\n",
204
+ " result.to_excel(output_file, index=False)\n",
205
+ " elif where_to_place == 'append_compare (เปรียบเทียบ column by column)':\n",
206
+ " final_cols = []\n",
207
+ " for col in output_col:\n",
208
+ " final_cols = final_cols + [col, col + '_translated']\n",
209
+ " result = df[final_cols]\n",
210
+ " result.to_excel(output_file, index=False)\n",
211
+ " elif where_to_place == 'replace':\n",
212
+ " final_cols = [col + '_translated' for col in output_col] \n",
213
+ " result = df[final_cols]\n",
214
+ " result.to_excel(output_file, index=False)\n",
215
+ " elif where_to_place == 'new_sheet':\n",
216
+ " final_cols = [col for col in output_col]\n",
217
+ " new_tab_cols = [col + '_translated' for col in output_col]\n",
218
+ "\n",
219
+ " result = df[final_cols]\n",
220
+ " result1 = df[new_tab_cols]\n",
221
+ " # Use ExcelWriter to write multiple sheets\n",
222
+ " with pd.ExcelWriter(output_file, engine='xlsxwriter') as writer:\n",
223
+ " result.to_excel(writer, sheet_name=sheet_name, index=False) # First sheet\n",
224
+ " result1.to_excel(writer, sheet_name=f'{sheet_name}_translated', index=False) # Second sheet\n",
225
+ "\n",
226
+ " progress(1.0, desc=\"Saving translated file... Completed!\")\n",
227
+ " except Exception as e:\n",
228
+ " print(f\"Error saving the file: {e}\")\n",
229
+ " raise gr.Error(f\"Error saving the file: {e}\")\n",
230
+ "\n",
231
+ " progress(1.0, desc=\"Completed all tasks!\")\n",
232
+ " return output_file\n",
233
+ "\n"
234
+ ]
235
+ },
236
+ {
237
+ "cell_type": "markdown",
238
+ "metadata": {},
239
+ "source": [
240
+ "## Main function\n"
241
+ ]
242
+ },
243
+ {
244
+ "cell_type": "code",
245
+ "execution_count": null,
246
+ "metadata": {
247
+ "id": "x8Njoc4fROSp"
248
+ },
249
+ "outputs": [
250
+ {
251
+ "name": "stdout",
252
+ "output_type": "stream",
253
+ "text": [
254
+ "Running on local URL: http://127.0.0.1:7860\n",
255
+ "\n",
256
+ "To create a public link, set `share=True` in `launch()`.\n"
257
+ ]
258
+ },
259
+ {
260
+ "data": {
261
+ "text/html": [
262
+ "<div><iframe src=\"http://127.0.0.1:7860/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
263
+ ],
264
+ "text/plain": [
265
+ "<IPython.core.display.HTML object>"
266
+ ]
267
+ },
268
+ "metadata": {},
269
+ "output_type": "display_data"
270
+ },
271
+ {
272
+ "name": "stderr",
273
+ "output_type": "stream",
274
+ "text": [
275
+ "/Users/petchakrit_pinyopawasutthi/anaconda3/lib/python3.11/site-packages/gradio/analytics.py:106: UserWarning: IMPORTANT: You are using gradio version 4.44.0, however version 4.44.1 is available, please upgrade. \n",
276
+ "--------\n",
277
+ " warnings.warn(\n"
278
+ ]
279
+ },
280
+ {
281
+ "name": "stdout",
282
+ "output_type": "stream",
283
+ "text": [
284
+ "['Thai', 'English', 'ABC']\n",
285
+ "Keyboard interruption in main thread... closing server.\n"
286
+ ]
287
+ },
288
+ {
289
+ "data": {
290
+ "text/plain": []
291
+ },
292
+ "execution_count": 31,
293
+ "metadata": {},
294
+ "output_type": "execute_result"
295
+ }
296
+ ],
297
+ "source": [
298
+ "with gr.Blocks() as iface:\n",
299
+ " gr.Markdown(\"## Excel Translation Interface\")\n",
300
+ "\n",
301
+ " excel_file = gr.File(label=\"Upload Excel File\")\n",
302
+ " sheet_name = gr.Dropdown(label=\"Select Sheet\", interactive=True)\n",
303
+ " column_name= gr.Dropdown(label = \"Select Column to Translate (Not require)\", multiselect=True, interactive=True)\n",
304
+ " \n",
305
+ " with gr.Row():\n",
306
+ " source_language = gr.Textbox(label=\"Source Language Code\")\n",
307
+ " target_language = gr.Textbox(label=\"Target Language Code\")\n",
308
+ " with gr.Row():\n",
309
+ " where_to_place = gr.Dropdown(multiselect=False ,label=\"How translated columns should be placed\"\n",
310
+ " , choices = ['replace', \n",
311
+ " 'append_all (ต่อ column สุดท้าย)', \n",
312
+ " 'append_compare (เปรียบเทียบ column by column)', \n",
313
+ " 'new_sheet']\n",
314
+ " , interactive=True\n",
315
+ " )\n",
316
+ " keep_original = gr.Dropdown(multiselect=False ,label=\"You want to keep original column or just only the translated column\"\n",
317
+ " , choices = ['keep original', \n",
318
+ " 'translated_column']\n",
319
+ " , interactive=True\n",
320
+ " )\n",
321
+ " \n",
322
+ " def get_sheet_names(file):\n",
323
+ " xls = pd.ExcelFile(file.name)\n",
324
+ " return xls.sheet_names\n",
325
+ "\n",
326
+ " def update_sheets(file):\n",
327
+ " sheets = get_sheet_names(file)\n",
328
+ " return gr.update(choices=sheets)\n",
329
+ "\n",
330
+ " def update_columns(file, sheet_name):\n",
331
+ " columns = get_column_names(file, sheet_name)\n",
332
+ " return gr.update(choices=columns)\n",
333
+ "\n",
334
+ " def get_column_names(file, sheet_name):\n",
335
+ " dd = pd.read_excel(file.name, sheet_name=sheet_name)\n",
336
+ " return list(dd.columns)\n",
337
+ " \n",
338
+ "\n",
339
+ " excel_file.change(fn=update_sheets, inputs=excel_file, outputs=sheet_name)\n",
340
+ " sheet_name.change(fn=update_columns, inputs=[excel_file, sheet_name], outputs=column_name)\n",
341
+ "\n",
342
+ " model_choosing = gr.Dropdown(multiselect = False , \n",
343
+ " label = \"Choosing Model you want\", \n",
344
+ " choices = ['ChatGPT (4o-mini)', 'another (In Progress)']\n",
345
+ " , interactive=True\n",
346
+ " )\n",
347
+ "\n",
348
+ " needed_require = gr.Textbox(label=\"API Key(require if Chatgpt)\")\n",
349
+ " translate_button = gr.Button(\"Translate\")\n",
350
+ " output_file = gr.File(label=\"Download Translated Excel File\", interactive=True)\n",
351
+ "\n",
352
+ " # Unified translation function\n",
353
+ " def translate_excel(\n",
354
+ " file, sheet_name, columns, source_lang, target_lang, place_option, keep_opt, model, api_key\n",
355
+ " ):\n",
356
+ " if model == \"ChatGPT (4o-mini)\":\n",
357
+ " # Call ChatGPT-based translation\n",
358
+ " return chat_gpt_translate_excel(\n",
359
+ " file, sheet_name, columns, source_lang, target_lang, place_option, keep_opt, model, api_key\n",
360
+ " )\n",
361
+ " else:\n",
362
+ " # Handle other models (currently in progress)\n",
363
+ " raise gr.Error(\"Translation with the selected model is not yet implemented.\")\n",
364
+ "\n",
365
+ " # Register button click\n",
366
+ " translate_button.click(\n",
367
+ " fn=translate_excel,\n",
368
+ " inputs=[\n",
369
+ " excel_file,\n",
370
+ " sheet_name,\n",
371
+ " column_name,\n",
372
+ " source_language,\n",
373
+ " target_language,\n",
374
+ " where_to_place,\n",
375
+ " keep_original,\n",
376
+ " model_choosing,\n",
377
+ " needed_require,\n",
378
+ " ],\n",
379
+ " outputs=output_file,\n",
380
+ " )\n",
381
+ "iface.launch(debug=True)\n",
382
+ "\n"
383
+ ]
384
+ },
385
+ {
386
+ "cell_type": "code",
387
+ "execution_count": null,
388
+ "metadata": {},
389
+ "outputs": [],
390
+ "source": []
391
+ }
392
+ ],
393
+ "metadata": {
394
+ "colab": {
395
+ "provenance": [
396
+ {
397
+ "file_id": "1SaYuZQocnldkcDTIWwqSYfInBXiPqNbN",
398
+ "timestamp": 1727236548844
399
+ }
400
+ ]
401
+ },
402
+ "kernelspec": {
403
+ "display_name": "base",
404
+ "language": "python",
405
+ "name": "python3"
406
+ },
407
+ "language_info": {
408
+ "codemirror_mode": {
409
+ "name": "ipython",
410
+ "version": 3
411
+ },
412
+ "file_extension": ".py",
413
+ "mimetype": "text/x-python",
414
+ "name": "python",
415
+ "nbconvert_exporter": "python",
416
+ "pygments_lexer": "ipython3",
417
+ "version": "3.11.5"
418
+ }
419
+ },
420
+ "nbformat": 4,
421
+ "nbformat_minor": 0
422
+ }
translator_app.py ADDED
@@ -0,0 +1,236 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import os
3
+ from langchain_openai import ChatOpenAI
4
+ from langchain_core.output_parsers import JsonOutputParser
5
+ from langchain_core.prompts import PromptTemplate
6
+ from langchain_core.runnables import RunnableLambda
7
+ import gradio as gr
8
+ import pandas as pd
9
+
10
+
11
+ def using_model(chosen_model, api_key):
12
+ if chosen_model == 'ChatGPT (4o-mini)':
13
+ model = chat_gpt_4o_mini(api_key = api_key)
14
+ else:
15
+ pass
16
+ return model
17
+
18
+ def chat_gpt_4o_mini(api_key = None):
19
+ model = ChatOpenAI(model_name="gpt-4o-mini", api_key=api_key)
20
+
21
+ str_prompt ="""
22
+ You will be provided with a sentence in {source_lang}, and your task is to translate it into {target_lang}.
23
+ Answer in Json format with key 'translated'
24
+ Sentence: {sentence}
25
+ """
26
+
27
+ output_parser = JsonOutputParser()
28
+ prompt = PromptTemplate(
29
+ template = str_prompt,
30
+ input_variables=["source_lang","target_lang","sentence"],
31
+ partial_variables={"format_instructions": output_parser.get_format_instructions()}
32
+ )
33
+ def get_class(x:dict)->str:
34
+ return x["translated"]
35
+
36
+ chain = prompt | model | output_parser | RunnableLambda(get_class)
37
+
38
+ return chain
39
+
40
+
41
+ def chat_gpt_translate_excel(file, sheet_name, col_name, source_lang, target_lang, where_to_place, keep_original, chosen_model, api_key = None, progress=gr.Progress()):
42
+ if where_to_place is None:
43
+ where_to_place = 'append_all'
44
+
45
+ model = using_model(chosen_model = chosen_model, api_key = api_key)
46
+
47
+ df = pd.read_excel(file.name, sheet_name=sheet_name, header=0)
48
+ original_col = df.columns
49
+ total_columns = len(df.columns)
50
+ current_step = 0
51
+
52
+ progress(0, desc="Starting translation process...")
53
+
54
+ # Automatically detect string columns if col_name is None
55
+ if col_name is None:
56
+ col_name = [col for col in df.columns if df[col].dtype == 'object']
57
+
58
+ # Determine columns that are not selected for translation
59
+ all_col = [col for col in df.columns if col not in col_name]
60
+
61
+ # Dictionary to store unique values and their translations
62
+ translation_map = {}
63
+ print(col_name)
64
+
65
+ # Process the selected columns for translation
66
+ for idx, col in enumerate(col_name):
67
+ current_step += 1
68
+ progress(current_step / total_columns, desc=f"Translating {col} ({current_step}/{total_columns})...")
69
+
70
+ try:
71
+ # Extract unique values from the column
72
+ unique_values = df[col].dropna().unique()
73
+ unique_values = list(set(unique_values)) # Ensure uniqueness
74
+
75
+ # Prepare data for translation
76
+ zh_sentence = [{"sentence": value, "source_lang": source_lang, "target_lang": target_lang} for value in unique_values]
77
+
78
+ # Translate unique values
79
+ answers = model.batch(zh_sentence, config={"max_concurrency": 3})
80
+
81
+ # Create a mapping from original values to translated values
82
+ translations = dict(zip(unique_values, answers))
83
+ translation_map[col] = translations
84
+
85
+ # Map translations back to the original DataFrame
86
+ df[col + "_translated"] = df[col].map(translations).fillna(df[col])
87
+
88
+ except Exception as e:
89
+ print(f"Error in column {col}: {e}")
90
+ continue
91
+
92
+ # Process remaining columns
93
+ for column in all_col:
94
+ current_step += 1
95
+ progress(current_step / total_columns, desc=f"Translating column name: {column} ({current_step}/{total_columns})...")
96
+
97
+ try:
98
+ # We do not translate all_col which remaining col
99
+ # all_col_translation = chain.batch([{"sentence": column, "source_lang": source_lang, "target_lang": target_lang}])
100
+ name_col = column + '_translated' # Assuming the translation returns a list of translations
101
+ df.loc[:, name_col] = df.loc[:, column]
102
+
103
+ except Exception as e:
104
+ print(f"Error in column {column}: {e}")
105
+ continue
106
+
107
+
108
+ output_file = "translated_output.xlsx"
109
+ if not os.path.exists(output_file):
110
+ pd.DataFrame().to_excel(output_file, index=False)
111
+
112
+ if keep_original == 'keep original':
113
+ output_col = original_col
114
+ else:
115
+ output_col = col_name
116
+ try:
117
+ if where_to_place == 'append_all (ต่อ column สุดท้าย)':
118
+ final_cols = list(output_col) + [col + '_translated' for col in output_col]
119
+ result = df[final_cols]
120
+ result.to_excel(output_file, index=False)
121
+ elif where_to_place == 'append_compare (เปรียบเทียบ column by column)':
122
+ final_cols = []
123
+ for col in output_col:
124
+ final_cols = final_cols + [col, col + '_translated']
125
+ result = df[final_cols]
126
+ result.to_excel(output_file, index=False)
127
+ elif where_to_place == 'replace':
128
+ final_cols = [col + '_translated' for col in output_col]
129
+ result = df[final_cols]
130
+ result.to_excel(output_file, index=False)
131
+ elif where_to_place == 'new_sheet':
132
+ final_cols = [col for col in output_col]
133
+ new_tab_cols = [col + '_translated' for col in output_col]
134
+
135
+ result = df[final_cols]
136
+ result1 = df[new_tab_cols]
137
+ # Use ExcelWriter to write multiple sheets
138
+ with pd.ExcelWriter(output_file, engine='xlsxwriter') as writer:
139
+ result.to_excel(writer, sheet_name=sheet_name, index=False) # First sheet
140
+ result1.to_excel(writer, sheet_name=f'{sheet_name}_translated', index=False) # Second sheet
141
+
142
+ progress(1.0, desc="Saving translated file... Completed!")
143
+ except Exception as e:
144
+ print(f"Error saving the file: {e}")
145
+ raise gr.Error(f"Error saving the file: {e}")
146
+
147
+ progress(1.0, desc="Completed all tasks!")
148
+ return output_file
149
+
150
+
151
+ if __name__ == "__main__" :
152
+ with gr.Blocks() as iface:
153
+ gr.Markdown("## Excel Translation Interface")
154
+
155
+ excel_file = gr.File(label="Upload Excel File")
156
+ sheet_name = gr.Dropdown(label="Select Sheet", interactive=True)
157
+ column_name= gr.Dropdown(label = "Select Column to Translate (Not require)", multiselect=True, interactive=True)
158
+
159
+ with gr.Row():
160
+ source_language = gr.Textbox(label="Source Language Code")
161
+ target_language = gr.Textbox(label="Target Language Code")
162
+ with gr.Row():
163
+ where_to_place = gr.Dropdown(multiselect=False ,label="How translated columns should be placed"
164
+ , choices = ['replace',
165
+ 'append_all (ต่อ column สุดท้าย)',
166
+ 'append_compare (เปรียบเทียบ column by column)',
167
+ 'new_sheet']
168
+ , interactive=True
169
+ )
170
+ keep_original = gr.Dropdown(multiselect=False ,label="You want to keep original column or just only the translated column"
171
+ , choices = ['keep original',
172
+ 'translated_column']
173
+ , interactive=True
174
+ )
175
+
176
+ def get_sheet_names(file):
177
+ xls = pd.ExcelFile(file.name)
178
+ return xls.sheet_names
179
+
180
+ def update_sheets(file):
181
+ sheets = get_sheet_names(file)
182
+ return gr.update(choices=sheets)
183
+
184
+ def update_columns(file, sheet_name):
185
+ columns = get_column_names(file, sheet_name)
186
+ return gr.update(choices=columns)
187
+
188
+ def get_column_names(file, sheet_name):
189
+ dd = pd.read_excel(file.name, sheet_name=sheet_name)
190
+ return list(dd.columns)
191
+
192
+
193
+ excel_file.change(fn=update_sheets, inputs=excel_file, outputs=sheet_name)
194
+ sheet_name.change(fn=update_columns, inputs=[excel_file, sheet_name], outputs=column_name)
195
+
196
+ model_choosing = gr.Dropdown(multiselect = False ,
197
+ label = "Choosing Model you want",
198
+ choices = ['ChatGPT (4o-mini)', 'another (In Progress)']
199
+ , interactive=True
200
+ )
201
+
202
+ needed_require = gr.Textbox(label="API Key(require if Chatgpt)")
203
+ translate_button = gr.Button("Translate")
204
+ output_file = gr.File(label="Download Translated Excel File", interactive=True)
205
+
206
+ # Unified translation function
207
+ def translate_excel(
208
+ file, sheet_name, columns, source_lang, target_lang, place_option, keep_opt, model, api_key
209
+ ):
210
+ if model == "ChatGPT (4o-mini)":
211
+ # Call ChatGPT-based translation
212
+ return chat_gpt_translate_excel(
213
+ file, sheet_name, columns, source_lang, target_lang, place_option, keep_opt, model, api_key
214
+ )
215
+ else:
216
+ # Handle other models (currently in progress)
217
+ raise gr.Error("Translation with the selected model is not yet implemented.")
218
+
219
+ # Register button click
220
+ translate_button.click(
221
+ fn=translate_excel,
222
+ inputs=[
223
+ excel_file,
224
+ sheet_name,
225
+ column_name,
226
+ source_language,
227
+ target_language,
228
+ where_to_place,
229
+ keep_original,
230
+ model_choosing,
231
+ needed_require,
232
+ ],
233
+ outputs=output_file,
234
+ )
235
+ iface.launch(debug=True, server_port= 7860, server_name="0.0.0.0")
236
+