Farhan1572 commited on
Commit
b1fc497
·
verified ·
1 Parent(s): 97781c4

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +207 -0
app.py ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from openai import OpenAI
3
+ import gradio as gr
4
+
5
+ import os
6
+
7
+ api_key = os.getenv("api_key")
8
+
9
+
10
+ client = OpenAI(api_key=api_key)
11
+
12
+ def get_dataframe(text):
13
+
14
+ # Initialize empty lists for each column
15
+ numbers = []
16
+ timestamps = []
17
+ texts = []
18
+
19
+ # Initialize variables to hold the current block's data
20
+ current_number = None
21
+ current_timestamp = None
22
+ current_text = ""
23
+
24
+ lines = text.split("\n")
25
+
26
+ # Process each line in the file
27
+ for line in lines:
28
+ line = line.strip() # Remove leading and trailing whitespace
29
+
30
+
31
+ # If the line starts with a number, it's the start of a new block
32
+ if line.isdigit():
33
+
34
+ # If this isn't the first block, save the data from the previous block
35
+ if current_number is not None:
36
+ numbers.append(current_number)
37
+ timestamps.append(current_timestamp)
38
+ texts.append(current_text)
39
+
40
+ # Initialize data for the new block
41
+ current_number = line
42
+ current_timestamp = None
43
+ current_text = ""
44
+
45
+ # If the line starts with a timestamp, it's the timestamp for the current block
46
+ elif '-->' in line:
47
+ current_timestamp = line
48
+
49
+ # Otherwise, it's part of the text for the current block
50
+ else:
51
+ current_text += line + "\n" # Add the line to the current text, along with a newline character
52
+
53
+ # Append the last block to the lists (if there is any)
54
+ if current_number is not None:
55
+ numbers.append(current_number)
56
+ timestamps.append(current_timestamp)
57
+ texts.append(current_text)
58
+
59
+ # Create DataFrame
60
+ df = pd.DataFrame({
61
+ 'Number': numbers,
62
+ 'Timestamp': timestamps,
63
+ 'Text': texts
64
+ })
65
+
66
+
67
+ return df
68
+
69
+
70
+
71
+ def translate_text(source_language, target_language, TEXT, max_cpl, ideal_cpl):
72
+
73
+ response = client.chat.completions.create(
74
+ model="gpt-3.5-turbo-0125",
75
+ temperature = 0.1,
76
+ messages=[
77
+ {"role": "system", "content": "You are a multilingual translator for movies subtitles."},
78
+ {"role": "system", "content": "The number of input characters and output characters should be the same despite the change in language."},
79
+ {"role": "system", "content": f"Ideal characters per line is {ideal_cpl} and maximum alloed charactr per line is {max_cpl}"},
80
+ {"role": "system", "content": "In response, maximum per line is {} "},
81
+ {"role": "system", "content": "Maximum two lines are allowed for the response"},
82
+ {"role": "system", "content": "You MUST USE NEW LINE WHERE ALREADY USED IN THE GIVEN TEXT"},
83
+ {"role": "system", "content": "YOU MUST KEEP ALL THE SEPARATORS IN THE RIGHT PLACE WHERE ALREADY PLACED IN THE ORIGINAL TEXT"},
84
+ {"role": "system", "content": "You SHOULD NOT SKIP ANY LINE OR ANY INFORMATION"},
85
+ {"role": "system", "content": "The Tranlation should be error proof"},
86
+
87
+
88
+ {"role": "user", "content": f"""Translate the text from {source_language} language to {target_language} language.:
89
+ \nTEXT: {TEXT}
90
+ \nREMEMBER: MAXIMUM CHARACTERS PER LINE IN RESPONSE ARE {max_cpl}
91
+ \nREMEMBER: MAXIMUM LINES ALLOWED IN THE RESPONSE IS 02
92
+ So make the translation accordingly so it accomodates the limit
93
+ NOTE: THE OUTPUT SHOULD BE IN {target_language} language.
94
+ """},
95
+ ]
96
+ )
97
+ return response.choices[0].message.content
98
+
99
+
100
+
101
+ def translate_text_correct(source_language, target_language, TEXT, max_cpl, ideal_cpl):
102
+ print("from the correction fucntion")
103
+
104
+ response = client.chat.completions.create(
105
+ model="gpt-3.5-turbo-0125",
106
+ temperature = 0.1,
107
+ messages=[
108
+ {"role": "system", "content": "You reduce the size of the sentences."},
109
+ {"role": "system", "content": f"The maximuim output sentecne should not be more than {max_cpl} characters."},
110
+
111
+
112
+
113
+ {"role": "user", "content": f"""
114
+ DO NOT CHANGE THE LANGUAGE
115
+ Reduce the size of the text to less than {max_cpl} even if there is a change in meaning.
116
+ \nWrite the sentence in shortest possible manner
117
+
118
+ \nTEXT: {TEXT}
119
+
120
+
121
+ """},
122
+ ]
123
+ )
124
+
125
+
126
+
127
+ return response.choices[0].message.content
128
+
129
+
130
+
131
+
132
+ def check_conditions(response, source_language, target_language, text, max_cpl, ideal_cpl, max_lines=2):
133
+ lines = response.split("\n")
134
+ num_lines = len(lines) + 1
135
+ for i, line in enumerate(lines):
136
+ if len(line) >= max_cpl:
137
+ print(line, "False")
138
+ # Modify the line
139
+ lines[i] = translate_text_correct(source_language, target_language, line, max_cpl, ideal_cpl)
140
+ # Recursively check the modified line
141
+ response = "\n".join(lines)
142
+ return check_conditions(response, source_language, target_language, text, max_cpl, ideal_cpl, max_lines)
143
+ else:
144
+ print(line, "True")
145
+ return response
146
+
147
+
148
+ def get_translation(text, source_language, target_language, max_cpl, ideal_cpl):
149
+ df = get_dataframe(text)
150
+ translated_text = []
151
+ for i in range(len(df)):
152
+ text = df['Text'][i]
153
+
154
+ response = translate_text(source_language, target_language, text, max_cpl, ideal_cpl)
155
+
156
+ response = check_conditions(response, source_language, target_language, text, max_cpl, ideal_cpl, max_lines = 2)
157
+
158
+ translated_text.append(response)
159
+ df['Translated_text'] = translated_text
160
+ return df
161
+
162
+
163
+
164
+
165
+
166
+
167
+
168
+ def translate(text, source_language, target_language, max_cpl, ideal_cpl):
169
+
170
+
171
+ # Translate text
172
+ df = get_translation(text, source_language, target_language, max_cpl, ideal_cpl)
173
+
174
+ # Create output .srt content
175
+ output_srt = ""
176
+ for i, row in df.iterrows():
177
+ output_srt += f"{row['Number']}\n{row['Timestamp']}\n{row['Translated_text']}\n\n"
178
+
179
+ return output_srt
180
+
181
+
182
+
183
+
184
+
185
+
186
+
187
+
188
+
189
+
190
+ # Interface for the Gradio app
191
+ interface = gr.Interface(
192
+ fn=translate,
193
+ inputs=[
194
+ gr.Textbox(label="Paste subtitles here" ),
195
+ gr.Textbox(label="Source Language (e.g., en)"),
196
+ gr.Textbox(label="Target Language (e.g., fr)"),
197
+ gr.Slider(minimum=1, maximum=100, label="Max Characters Per Line"),
198
+ gr.Slider(minimum=1, maximum=100, label="Ideal Characters Per Line"),
199
+ ],
200
+ outputs="text",
201
+ title="Subtitle Translator",
202
+ description="Translate subtitles to another language.",
203
+ allow_flagging=True # Enable user feedback for improvement
204
+ )
205
+
206
+ # Launch the Gradio app
207
+ interface.launch(debug = True)