Spaces:
Sleeping
Sleeping
Petch DS commited on
Commit ·
0a3b2e2
1
Parent(s): 74a4b1d
fix output format
Browse files- requirements.txt +1 -1
- translator_app.ipynb +132 -37
- translator_app.py +35 -22
requirements.txt
CHANGED
|
@@ -4,4 +4,4 @@ langchain-openai
|
|
| 4 |
xlsxwriter==3.2.0
|
| 5 |
pandas==2.0.3
|
| 6 |
numpy==1.24.3
|
| 7 |
-
openpyxl==3.1.5
|
|
|
|
| 4 |
xlsxwriter==3.2.0
|
| 5 |
pandas==2.0.3
|
| 6 |
numpy==1.24.3
|
| 7 |
+
openpyxl==3.1.5
|
translator_app.ipynb
CHANGED
|
@@ -20,7 +20,7 @@
|
|
| 20 |
},
|
| 21 |
{
|
| 22 |
"cell_type": "code",
|
| 23 |
-
"execution_count":
|
| 24 |
"metadata": {},
|
| 25 |
"outputs": [],
|
| 26 |
"source": [
|
|
@@ -30,9 +30,77 @@
|
|
| 30 |
"from langchain_core.prompts import PromptTemplate\n",
|
| 31 |
"from langchain_core.runnables import RunnableLambda\n",
|
| 32 |
"import gradio as gr\n",
|
| 33 |
-
"import pandas as pd"
|
|
|
|
|
|
|
| 34 |
]
|
| 35 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
{
|
| 37 |
"cell_type": "markdown",
|
| 38 |
"metadata": {},
|
|
@@ -42,7 +110,7 @@
|
|
| 42 |
},
|
| 43 |
{
|
| 44 |
"cell_type": "code",
|
| 45 |
-
"execution_count":
|
| 46 |
"metadata": {},
|
| 47 |
"outputs": [],
|
| 48 |
"source": [
|
|
@@ -53,7 +121,7 @@
|
|
| 53 |
"cell_type": "markdown",
|
| 54 |
"metadata": {},
|
| 55 |
"source": [
|
| 56 |
-
"# Process"
|
| 57 |
]
|
| 58 |
},
|
| 59 |
{
|
|
@@ -65,16 +133,10 @@
|
|
| 65 |
},
|
| 66 |
{
|
| 67 |
"cell_type": "code",
|
| 68 |
-
"execution_count":
|
| 69 |
"metadata": {},
|
| 70 |
"outputs": [],
|
| 71 |
"source": [
|
| 72 |
-
"def using_model(chosen_model, api_key):\n",
|
| 73 |
-
" if chosen_model == 'ChatGPT (4o-mini)':\n",
|
| 74 |
-
" model = chat_gpt_4o_mini(api_key = api_key)\n",
|
| 75 |
-
" else:\n",
|
| 76 |
-
" pass\n",
|
| 77 |
-
" return model\n",
|
| 78 |
"\n",
|
| 79 |
"def chat_gpt_4o_mini(api_key = None):\n",
|
| 80 |
" model = ChatOpenAI(model_name=\"gpt-4o-mini\", api_key=api_key)\n",
|
|
@@ -96,7 +158,22 @@
|
|
| 96 |
"\n",
|
| 97 |
" chain = prompt | model | output_parser | RunnableLambda(get_class) \n",
|
| 98 |
"\n",
|
| 99 |
-
" return chain"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
]
|
| 101 |
},
|
| 102 |
{
|
|
@@ -108,7 +185,7 @@
|
|
| 108 |
},
|
| 109 |
{
|
| 110 |
"cell_type": "code",
|
| 111 |
-
"execution_count":
|
| 112 |
"metadata": {
|
| 113 |
"colab": {
|
| 114 |
"base_uri": "https://localhost:8080/",
|
|
@@ -122,11 +199,12 @@
|
|
| 122 |
"\n",
|
| 123 |
"def chat_gpt_translate_excel(file, sheet_name, col_name, source_lang, target_lang, where_to_place, keep_original, chosen_model, api_key = None, progress=gr.Progress()):\n",
|
| 124 |
" if where_to_place is None:\n",
|
| 125 |
-
" where_to_place = 'append_all'\n",
|
| 126 |
"\n",
|
| 127 |
" model = using_model(chosen_model = chosen_model, api_key = api_key)\n",
|
| 128 |
"\n",
|
| 129 |
" df = pd.read_excel(file.name, sheet_name=sheet_name, header=0)\n",
|
|
|
|
| 130 |
" original_col = df.columns\n",
|
| 131 |
" total_columns = len(df.columns)\n",
|
| 132 |
" current_step = 0\n",
|
|
@@ -134,20 +212,23 @@
|
|
| 134 |
" progress(0, desc=\"Starting translation process...\")\n",
|
| 135 |
"\n",
|
| 136 |
" # Automatically detect string columns if col_name is None\n",
|
|
|
|
| 137 |
" if col_name is None:\n",
|
| 138 |
" col_name = [col for col in df.columns if df[col].dtype == 'object']\n",
|
| 139 |
"\n",
|
| 140 |
" # Determine columns that are not selected for translation\n",
|
| 141 |
-
"
|
|
|
|
| 142 |
"\n",
|
| 143 |
" # Dictionary to store unique values and their translations\n",
|
| 144 |
" translation_map = {}\n",
|
|
|
|
| 145 |
" print(col_name)\n",
|
| 146 |
"\n",
|
| 147 |
" # Process the selected columns for translation\n",
|
| 148 |
" for idx, col in enumerate(col_name):\n",
|
| 149 |
" current_step += 1\n",
|
| 150 |
-
" progress(current_step / total_columns, desc=f\"Translating {col} ({current_step}/{
|
| 151 |
"\n",
|
| 152 |
" try:\n",
|
| 153 |
" # Extract unique values from the column\n",
|
|
@@ -164,6 +245,7 @@
|
|
| 164 |
" translations = dict(zip(unique_values, answers))\n",
|
| 165 |
" translation_map[col] = translations\n",
|
| 166 |
"\n",
|
|
|
|
| 167 |
" # Map translations back to the original DataFrame\n",
|
| 168 |
" df[col + \"_translated\"] = df[col].map(translations).fillna(df[col])\n",
|
| 169 |
"\n",
|
|
@@ -171,50 +253,63 @@
|
|
| 171 |
" print(f\"Error in column {col}: {e}\")\n",
|
| 172 |
" continue\n",
|
| 173 |
"\n",
|
| 174 |
-
" # Process remaining columns\n",
|
| 175 |
-
" for column in
|
| 176 |
-
"
|
| 177 |
-
"
|
| 178 |
"\n",
|
| 179 |
-
"
|
| 180 |
-
"
|
| 181 |
-
"
|
| 182 |
-
"
|
| 183 |
-
"
|
| 184 |
"\n",
|
| 185 |
-
"
|
| 186 |
-
"
|
| 187 |
-
"
|
| 188 |
"\n",
|
| 189 |
" \n",
|
| 190 |
-
" output_file = \"
|
| 191 |
" if not os.path.exists(output_file):\n",
|
| 192 |
" pd.DataFrame().to_excel(output_file, index=False)\n",
|
| 193 |
"\n",
|
| 194 |
" if keep_original == 'keep original':\n",
|
|
|
|
| 195 |
" output_col = original_col\n",
|
| 196 |
" else:\n",
|
|
|
|
| 197 |
" output_col = col_name\n",
|
| 198 |
"\n",
|
| 199 |
" \n",
|
| 200 |
" try:\n",
|
| 201 |
" if where_to_place == 'append_all (ต่อ column สุดท้าย)':\n",
|
| 202 |
-
" final_cols = list(output_col) + [col
|
| 203 |
" result = df[final_cols]\n",
|
| 204 |
" result.to_excel(output_file, index=False)\n",
|
| 205 |
" elif where_to_place == 'append_compare (เปรียบเทียบ column by column)':\n",
|
| 206 |
" final_cols = []\n",
|
| 207 |
" for col in output_col:\n",
|
| 208 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
" result = df[final_cols]\n",
|
| 210 |
" result.to_excel(output_file, index=False)\n",
|
| 211 |
" elif where_to_place == 'replace':\n",
|
| 212 |
-
" final_cols = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 213 |
" result = df[final_cols]\n",
|
| 214 |
" result.to_excel(output_file, index=False)\n",
|
|
|
|
| 215 |
" elif where_to_place == 'new_sheet':\n",
|
| 216 |
" final_cols = [col for col in output_col]\n",
|
| 217 |
-
" new_tab_cols =
|
| 218 |
"\n",
|
| 219 |
" result = df[final_cols]\n",
|
| 220 |
" result1 = df[new_tab_cols]\n",
|
|
@@ -242,7 +337,7 @@
|
|
| 242 |
},
|
| 243 |
{
|
| 244 |
"cell_type": "code",
|
| 245 |
-
"execution_count":
|
| 246 |
"metadata": {
|
| 247 |
"id": "x8Njoc4fROSp"
|
| 248 |
},
|
|
@@ -281,7 +376,7 @@
|
|
| 281 |
"name": "stdout",
|
| 282 |
"output_type": "stream",
|
| 283 |
"text": [
|
| 284 |
-
"['
|
| 285 |
"Keyboard interruption in main thread... closing server.\n"
|
| 286 |
]
|
| 287 |
},
|
|
@@ -289,7 +384,7 @@
|
|
| 289 |
"data": {
|
| 290 |
"text/plain": []
|
| 291 |
},
|
| 292 |
-
"execution_count":
|
| 293 |
"metadata": {},
|
| 294 |
"output_type": "execute_result"
|
| 295 |
}
|
|
@@ -341,7 +436,7 @@
|
|
| 341 |
"\n",
|
| 342 |
" model_choosing = gr.Dropdown(multiselect = False , \n",
|
| 343 |
" label = \"Choosing Model you want\", \n",
|
| 344 |
-
" choices = ['ChatGPT (4o-mini)', 'another (In Progress)']\n",
|
| 345 |
" , interactive=True\n",
|
| 346 |
" )\n",
|
| 347 |
"\n",
|
|
|
|
| 20 |
},
|
| 21 |
{
|
| 22 |
"cell_type": "code",
|
| 23 |
+
"execution_count": 21,
|
| 24 |
"metadata": {},
|
| 25 |
"outputs": [],
|
| 26 |
"source": [
|
|
|
|
| 30 |
"from langchain_core.prompts import PromptTemplate\n",
|
| 31 |
"from langchain_core.runnables import RunnableLambda\n",
|
| 32 |
"import gradio as gr\n",
|
| 33 |
+
"import pandas as pd\n",
|
| 34 |
+
"from transformers import T5Tokenizer, T5ForConditionalGeneration\n",
|
| 35 |
+
"import torch\n"
|
| 36 |
]
|
| 37 |
},
|
| 38 |
+
{
|
| 39 |
+
"cell_type": "code",
|
| 40 |
+
"execution_count": 22,
|
| 41 |
+
"metadata": {},
|
| 42 |
+
"outputs": [],
|
| 43 |
+
"source": [
|
| 44 |
+
"# from docx import Document\n",
|
| 45 |
+
"\n",
|
| 46 |
+
"# # โหลดไฟล์ Word\n",
|
| 47 |
+
"# doc = Document('test_file.docx')\n",
|
| 48 |
+
"\n",
|
| 49 |
+
"# # อ่านทุก paragraph และแสดงเนื้อหา\n",
|
| 50 |
+
"# for para in doc.paragraphs:\n",
|
| 51 |
+
"# print(para.text)"
|
| 52 |
+
]
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"cell_type": "code",
|
| 56 |
+
"execution_count": 23,
|
| 57 |
+
"metadata": {},
|
| 58 |
+
"outputs": [],
|
| 59 |
+
"source": [
|
| 60 |
+
"# # อ่านทุกตารางในเอกสาร\n",
|
| 61 |
+
"# for table in doc.tables:\n",
|
| 62 |
+
"# for row in table.rows:\n",
|
| 63 |
+
"# for cell in row.cells:\n",
|
| 64 |
+
"# print(cell.text)"
|
| 65 |
+
]
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"cell_type": "code",
|
| 69 |
+
"execution_count": 24,
|
| 70 |
+
"metadata": {},
|
| 71 |
+
"outputs": [],
|
| 72 |
+
"source": [
|
| 73 |
+
"# for element in doc.element.body:\n",
|
| 74 |
+
"# if element.tag.endswith('tbl'):\n",
|
| 75 |
+
"# # ถ้าเป็นตาราง\n",
|
| 76 |
+
"# print('Table found')\n",
|
| 77 |
+
"# elif element.tag.endswith('p'):\n",
|
| 78 |
+
"# # ถ้าเป็นพารากราฟ\n",
|
| 79 |
+
"# print('Paragraph found')"
|
| 80 |
+
]
|
| 81 |
+
},
|
| 82 |
+
{
|
| 83 |
+
"cell_type": "code",
|
| 84 |
+
"execution_count": 25,
|
| 85 |
+
"metadata": {},
|
| 86 |
+
"outputs": [],
|
| 87 |
+
"source": [
|
| 88 |
+
"# # นับรูปภาพ\n",
|
| 89 |
+
"# images = doc.inline_shapes\n",
|
| 90 |
+
"# print(\"Found\", len(images), \"images\")\n",
|
| 91 |
+
"\n",
|
| 92 |
+
"# # ตัวอย่างวิธีดึงข้อมูลพื้นฐานของรูปภาพแต่ละรูป\n",
|
| 93 |
+
"# for image in images:\n",
|
| 94 |
+
"# print(\"Image size:\", image.width.pt, \"x\", image.height.pt) "
|
| 95 |
+
]
|
| 96 |
+
},
|
| 97 |
+
{
|
| 98 |
+
"cell_type": "code",
|
| 99 |
+
"execution_count": null,
|
| 100 |
+
"metadata": {},
|
| 101 |
+
"outputs": [],
|
| 102 |
+
"source": []
|
| 103 |
+
},
|
| 104 |
{
|
| 105 |
"cell_type": "markdown",
|
| 106 |
"metadata": {},
|
|
|
|
| 110 |
},
|
| 111 |
{
|
| 112 |
"cell_type": "code",
|
| 113 |
+
"execution_count": 26,
|
| 114 |
"metadata": {},
|
| 115 |
"outputs": [],
|
| 116 |
"source": [
|
|
|
|
| 121 |
"cell_type": "markdown",
|
| 122 |
"metadata": {},
|
| 123 |
"source": [
|
| 124 |
+
"# Process for Each Model"
|
| 125 |
]
|
| 126 |
},
|
| 127 |
{
|
|
|
|
| 133 |
},
|
| 134 |
{
|
| 135 |
"cell_type": "code",
|
| 136 |
+
"execution_count": 27,
|
| 137 |
"metadata": {},
|
| 138 |
"outputs": [],
|
| 139 |
"source": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
"\n",
|
| 141 |
"def chat_gpt_4o_mini(api_key = None):\n",
|
| 142 |
" model = ChatOpenAI(model_name=\"gpt-4o-mini\", api_key=api_key)\n",
|
|
|
|
| 158 |
"\n",
|
| 159 |
" chain = prompt | model | output_parser | RunnableLambda(get_class) \n",
|
| 160 |
"\n",
|
| 161 |
+
" return chain\n"
|
| 162 |
+
]
|
| 163 |
+
},
|
| 164 |
+
{
|
| 165 |
+
"cell_type": "code",
|
| 166 |
+
"execution_count": 28,
|
| 167 |
+
"metadata": {},
|
| 168 |
+
"outputs": [],
|
| 169 |
+
"source": [
|
| 170 |
+
"\n",
|
| 171 |
+
"\n",
|
| 172 |
+
"def using_model(chosen_model, api_key=None):\n",
|
| 173 |
+
" if chosen_model == 'ChatGPT (4o-mini)':\n",
|
| 174 |
+
" return chat_gpt_4o_mini(api_key=api_key)\n",
|
| 175 |
+
" else:\n",
|
| 176 |
+
" raise ValueError(\"Unsupported model selected\")\n"
|
| 177 |
]
|
| 178 |
},
|
| 179 |
{
|
|
|
|
| 185 |
},
|
| 186 |
{
|
| 187 |
"cell_type": "code",
|
| 188 |
+
"execution_count": 35,
|
| 189 |
"metadata": {
|
| 190 |
"colab": {
|
| 191 |
"base_uri": "https://localhost:8080/",
|
|
|
|
| 199 |
"\n",
|
| 200 |
"def chat_gpt_translate_excel(file, sheet_name, col_name, source_lang, target_lang, where_to_place, keep_original, chosen_model, api_key = None, progress=gr.Progress()):\n",
|
| 201 |
" if where_to_place is None:\n",
|
| 202 |
+
" where_to_place = 'append_all (ต่อ column สุดท้าย)'\n",
|
| 203 |
"\n",
|
| 204 |
" model = using_model(chosen_model = chosen_model, api_key = api_key)\n",
|
| 205 |
"\n",
|
| 206 |
" df = pd.read_excel(file.name, sheet_name=sheet_name, header=0)\n",
|
| 207 |
+
" # original columns = col_name + remain_col\n",
|
| 208 |
" original_col = df.columns\n",
|
| 209 |
" total_columns = len(df.columns)\n",
|
| 210 |
" current_step = 0\n",
|
|
|
|
| 212 |
" progress(0, desc=\"Starting translation process...\")\n",
|
| 213 |
"\n",
|
| 214 |
" # Automatically detect string columns if col_name is None\n",
|
| 215 |
+
" # col_name is column we want to translate\n",
|
| 216 |
" if col_name is None:\n",
|
| 217 |
" col_name = [col for col in df.columns if df[col].dtype == 'object']\n",
|
| 218 |
"\n",
|
| 219 |
" # Determine columns that are not selected for translation\n",
|
| 220 |
+
" # remain_col is column we do not want to translate\n",
|
| 221 |
+
" remain_col = [col for col in df.columns if col not in col_name]\n",
|
| 222 |
"\n",
|
| 223 |
" # Dictionary to store unique values and their translations\n",
|
| 224 |
" translation_map = {}\n",
|
| 225 |
+
" trans_col_name = []\n",
|
| 226 |
" print(col_name)\n",
|
| 227 |
"\n",
|
| 228 |
" # Process the selected columns for translation\n",
|
| 229 |
" for idx, col in enumerate(col_name):\n",
|
| 230 |
" current_step += 1\n",
|
| 231 |
+
" progress(current_step / total_columns, desc=f\"Translating {col} ({current_step}/{len(col_name)})...\")\n",
|
| 232 |
"\n",
|
| 233 |
" try:\n",
|
| 234 |
" # Extract unique values from the column\n",
|
|
|
|
| 245 |
" translations = dict(zip(unique_values, answers))\n",
|
| 246 |
" translation_map[col] = translations\n",
|
| 247 |
"\n",
|
| 248 |
+
" trans_col_name.append(col + \"_translated\")\n",
|
| 249 |
" # Map translations back to the original DataFrame\n",
|
| 250 |
" df[col + \"_translated\"] = df[col].map(translations).fillna(df[col])\n",
|
| 251 |
"\n",
|
|
|
|
| 253 |
" print(f\"Error in column {col}: {e}\")\n",
|
| 254 |
" continue\n",
|
| 255 |
"\n",
|
| 256 |
+
" # # Process remaining columns\n",
|
| 257 |
+
" # for column in remain_col:\n",
|
| 258 |
+
" # current_step += 1\n",
|
| 259 |
+
" # progress(current_step / total_columns, desc=f\"Translating column name: {column} ({current_step}/{len(remain_col)})...\")\n",
|
| 260 |
"\n",
|
| 261 |
+
" # try:\n",
|
| 262 |
+
" # # We do not translate remain_col which remaining col\n",
|
| 263 |
+
" # # remain_col = chain.batch([{\"sentence\": column, \"source_lang\": source_lang, \"target_lang\": target_lang}])\n",
|
| 264 |
+
" # name_col = column + '_translated' # Assuming the translation returns a list of translations\n",
|
| 265 |
+
" # df.loc[:, name_col] = df.loc[:, column]\n",
|
| 266 |
"\n",
|
| 267 |
+
" # except Exception as e:\n",
|
| 268 |
+
" # print(f\"Error in column {column}: {e}\")\n",
|
| 269 |
+
" # continue\n",
|
| 270 |
"\n",
|
| 271 |
" \n",
|
| 272 |
+
" output_file = f\"{file.name.split('.')[0]}_translated.xlsx\"\n",
|
| 273 |
" if not os.path.exists(output_file):\n",
|
| 274 |
" pd.DataFrame().to_excel(output_file, index=False)\n",
|
| 275 |
"\n",
|
| 276 |
" if keep_original == 'keep original':\n",
|
| 277 |
+
" # have the all columns\n",
|
| 278 |
" output_col = original_col\n",
|
| 279 |
" else:\n",
|
| 280 |
+
" # only translated column\n",
|
| 281 |
" output_col = col_name\n",
|
| 282 |
"\n",
|
| 283 |
" \n",
|
| 284 |
" try:\n",
|
| 285 |
" if where_to_place == 'append_all (ต่อ column สุดท้าย)':\n",
|
| 286 |
+
" final_cols = list(output_col) + [col for col in trans_col_name]\n",
|
| 287 |
" result = df[final_cols]\n",
|
| 288 |
" result.to_excel(output_file, index=False)\n",
|
| 289 |
" elif where_to_place == 'append_compare (เปรียบเทียบ column by column)':\n",
|
| 290 |
" final_cols = []\n",
|
| 291 |
" for col in output_col:\n",
|
| 292 |
+
" for trans_col in trans_col_name:\n",
|
| 293 |
+
" if col + '_translated' == trans_col:\n",
|
| 294 |
+
" final_cols = final_cols + [col, trans_col]\n",
|
| 295 |
+
" else:\n",
|
| 296 |
+
" final_cols = final_cols + [col]\n",
|
| 297 |
" result = df[final_cols]\n",
|
| 298 |
" result.to_excel(output_file, index=False)\n",
|
| 299 |
" elif where_to_place == 'replace':\n",
|
| 300 |
+
" final_cols = []\n",
|
| 301 |
+
" for col in output_col:\n",
|
| 302 |
+
" for trans_col in trans_col_name:\n",
|
| 303 |
+
" if col + '_translated' == trans_col:\n",
|
| 304 |
+
" final_cols = final_cols + [trans_col]\n",
|
| 305 |
+
" else:\n",
|
| 306 |
+
" final_cols = final_cols + [col]\n",
|
| 307 |
" result = df[final_cols]\n",
|
| 308 |
" result.to_excel(output_file, index=False)\n",
|
| 309 |
+
"\n",
|
| 310 |
" elif where_to_place == 'new_sheet':\n",
|
| 311 |
" final_cols = [col for col in output_col]\n",
|
| 312 |
+
" new_tab_cols = trans_col_name\n",
|
| 313 |
"\n",
|
| 314 |
" result = df[final_cols]\n",
|
| 315 |
" result1 = df[new_tab_cols]\n",
|
|
|
|
| 337 |
},
|
| 338 |
{
|
| 339 |
"cell_type": "code",
|
| 340 |
+
"execution_count": 36,
|
| 341 |
"metadata": {
|
| 342 |
"id": "x8Njoc4fROSp"
|
| 343 |
},
|
|
|
|
| 376 |
"name": "stdout",
|
| 377 |
"output_type": "stream",
|
| 378 |
"text": [
|
| 379 |
+
"['control type']\n",
|
| 380 |
"Keyboard interruption in main thread... closing server.\n"
|
| 381 |
]
|
| 382 |
},
|
|
|
|
| 384 |
"data": {
|
| 385 |
"text/plain": []
|
| 386 |
},
|
| 387 |
+
"execution_count": 36,
|
| 388 |
"metadata": {},
|
| 389 |
"output_type": "execute_result"
|
| 390 |
}
|
|
|
|
| 436 |
"\n",
|
| 437 |
" model_choosing = gr.Dropdown(multiselect = False , \n",
|
| 438 |
" label = \"Choosing Model you want\", \n",
|
| 439 |
+
" choices = ['ChatGPT (4o-mini)', 'DeepSeek (developing...)','another (In Progress)']\n",
|
| 440 |
" , interactive=True\n",
|
| 441 |
" )\n",
|
| 442 |
"\n",
|
translator_app.py
CHANGED
|
@@ -56,11 +56,11 @@ def chat_gpt_translate_excel(file, sheet_name, col_name, source_lang, target_lan
|
|
| 56 |
col_name = [col for col in df.columns if df[col].dtype == 'object']
|
| 57 |
|
| 58 |
# Determine columns that are not selected for translation
|
| 59 |
-
|
| 60 |
|
| 61 |
# Dictionary to store unique values and their translations
|
| 62 |
translation_map = {}
|
| 63 |
-
|
| 64 |
|
| 65 |
# Process the selected columns for translation
|
| 66 |
for idx, col in enumerate(col_name):
|
|
@@ -82,6 +82,7 @@ def chat_gpt_translate_excel(file, sheet_name, col_name, source_lang, target_lan
|
|
| 82 |
translations = dict(zip(unique_values, answers))
|
| 83 |
translation_map[col] = translations
|
| 84 |
|
|
|
|
| 85 |
# Map translations back to the original DataFrame
|
| 86 |
df[col + "_translated"] = df[col].map(translations).fillna(df[col])
|
| 87 |
|
|
@@ -89,23 +90,23 @@ def chat_gpt_translate_excel(file, sheet_name, col_name, source_lang, target_lan
|
|
| 89 |
print(f"Error in column {col}: {e}")
|
| 90 |
continue
|
| 91 |
|
| 92 |
-
# Process remaining columns
|
| 93 |
-
for column in
|
| 94 |
-
|
| 95 |
-
|
| 96 |
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
|
| 107 |
|
| 108 |
-
output_file = "
|
| 109 |
if not os.path.exists(output_file):
|
| 110 |
pd.DataFrame().to_excel(output_file, index=False)
|
| 111 |
|
|
@@ -115,22 +116,33 @@ def chat_gpt_translate_excel(file, sheet_name, col_name, source_lang, target_lan
|
|
| 115 |
output_col = col_name
|
| 116 |
try:
|
| 117 |
if where_to_place == 'append_all (ต่อ column สุดท้าย)':
|
| 118 |
-
final_cols = list(output_col) + [col
|
| 119 |
result = df[final_cols]
|
| 120 |
result.to_excel(output_file, index=False)
|
| 121 |
elif where_to_place == 'append_compare (เปรียบเทียบ column by column)':
|
| 122 |
final_cols = []
|
| 123 |
for col in output_col:
|
| 124 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
result = df[final_cols]
|
| 126 |
result.to_excel(output_file, index=False)
|
| 127 |
elif where_to_place == 'replace':
|
| 128 |
-
final_cols = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
result = df[final_cols]
|
| 130 |
result.to_excel(output_file, index=False)
|
|
|
|
| 131 |
elif where_to_place == 'new_sheet':
|
| 132 |
final_cols = [col for col in output_col]
|
| 133 |
-
new_tab_cols =
|
| 134 |
|
| 135 |
result = df[final_cols]
|
| 136 |
result1 = df[new_tab_cols]
|
|
@@ -195,7 +207,7 @@ if __name__ == "__main__" :
|
|
| 195 |
|
| 196 |
model_choosing = gr.Dropdown(multiselect = False ,
|
| 197 |
label = "Choosing Model you want",
|
| 198 |
-
choices = ['ChatGPT (4o-mini)', 'another (In Progress)']
|
| 199 |
, interactive=True
|
| 200 |
)
|
| 201 |
|
|
@@ -232,5 +244,6 @@ if __name__ == "__main__" :
|
|
| 232 |
],
|
| 233 |
outputs=output_file,
|
| 234 |
)
|
| 235 |
-
iface.launch(debug=True, share=True, server_port= 7860,
|
| 236 |
-
|
|
|
|
|
|
| 56 |
col_name = [col for col in df.columns if df[col].dtype == 'object']
|
| 57 |
|
| 58 |
# Determine columns that are not selected for translation
|
| 59 |
+
remain_col = [col for col in df.columns if col not in col_name]
|
| 60 |
|
| 61 |
# Dictionary to store unique values and their translations
|
| 62 |
translation_map = {}
|
| 63 |
+
trans_col_name = []
|
| 64 |
|
| 65 |
# Process the selected columns for translation
|
| 66 |
for idx, col in enumerate(col_name):
|
|
|
|
| 82 |
translations = dict(zip(unique_values, answers))
|
| 83 |
translation_map[col] = translations
|
| 84 |
|
| 85 |
+
trans_col_name.append(col + "_translated")
|
| 86 |
# Map translations back to the original DataFrame
|
| 87 |
df[col + "_translated"] = df[col].map(translations).fillna(df[col])
|
| 88 |
|
|
|
|
| 90 |
print(f"Error in column {col}: {e}")
|
| 91 |
continue
|
| 92 |
|
| 93 |
+
# # Process remaining columns
|
| 94 |
+
# for column in remain_col:
|
| 95 |
+
# current_step += 1
|
| 96 |
+
# progress(current_step / total_columns, desc=f"Translating column name: {column} ({current_step}/{len(remain_col)})...")
|
| 97 |
|
| 98 |
+
# try:
|
| 99 |
+
# # We do not translate remain_col which remaining col
|
| 100 |
+
# # remain_col = chain.batch([{"sentence": column, "source_lang": source_lang, "target_lang": target_lang}])
|
| 101 |
+
# name_col = column + '_translated' # Assuming the translation returns a list of translations
|
| 102 |
+
# df.loc[:, name_col] = df.loc[:, column]
|
| 103 |
|
| 104 |
+
# except Exception as e:
|
| 105 |
+
# print(f"Error in column {column}: {e}")
|
| 106 |
+
# continue
|
| 107 |
|
| 108 |
|
| 109 |
+
output_file = f"{file.name}_translated.xlsx"
|
| 110 |
if not os.path.exists(output_file):
|
| 111 |
pd.DataFrame().to_excel(output_file, index=False)
|
| 112 |
|
|
|
|
| 116 |
output_col = col_name
|
| 117 |
try:
|
| 118 |
if where_to_place == 'append_all (ต่อ column สุดท้าย)':
|
| 119 |
+
final_cols = list(output_col) + [col for col in trans_col_name]
|
| 120 |
result = df[final_cols]
|
| 121 |
result.to_excel(output_file, index=False)
|
| 122 |
elif where_to_place == 'append_compare (เปรียบเทียบ column by column)':
|
| 123 |
final_cols = []
|
| 124 |
for col in output_col:
|
| 125 |
+
for trans_col in trans_col_name:
|
| 126 |
+
if col + '_translated' == trans_col:
|
| 127 |
+
final_cols = final_cols + [col, trans_col]
|
| 128 |
+
else:
|
| 129 |
+
final_cols = final_cols + [col]
|
| 130 |
result = df[final_cols]
|
| 131 |
result.to_excel(output_file, index=False)
|
| 132 |
elif where_to_place == 'replace':
|
| 133 |
+
final_cols = []
|
| 134 |
+
for col in output_col:
|
| 135 |
+
for trans_col in trans_col_name:
|
| 136 |
+
if col + '_translated' == trans_col:
|
| 137 |
+
final_cols = final_cols + [trans_col]
|
| 138 |
+
else:
|
| 139 |
+
final_cols = final_cols + [col]
|
| 140 |
result = df[final_cols]
|
| 141 |
result.to_excel(output_file, index=False)
|
| 142 |
+
|
| 143 |
elif where_to_place == 'new_sheet':
|
| 144 |
final_cols = [col for col in output_col]
|
| 145 |
+
new_tab_cols = trans_col_name
|
| 146 |
|
| 147 |
result = df[final_cols]
|
| 148 |
result1 = df[new_tab_cols]
|
|
|
|
| 207 |
|
| 208 |
model_choosing = gr.Dropdown(multiselect = False ,
|
| 209 |
label = "Choosing Model you want",
|
| 210 |
+
choices = ['ChatGPT (4o-mini)', 'DeepSeek (developing...)', 'another (In Progress)']
|
| 211 |
, interactive=True
|
| 212 |
)
|
| 213 |
|
|
|
|
| 244 |
],
|
| 245 |
outputs=output_file,
|
| 246 |
)
|
| 247 |
+
iface.launch(debug=True, share=True, server_port= 7860,
|
| 248 |
+
server_name="0.0.0.0"
|
| 249 |
+
)
|