Spaces:

Farhan1572
/

subtitles_final

Runtime error

App Files Files Community

Farhan1572 commited on Apr 24, 2024

Commit

b1fc497

verified ·

1 Parent(s): 97781c4

Create app.py

Browse files

Files changed (1) hide show

app.py +207 -0

app.py ADDED Viewed

	@@ -0,0 +1,207 @@

+import pandas as pd
+from openai import OpenAI
+import gradio as gr
+import os
+api_key = os.getenv("api_key")
+client = OpenAI(api_key=api_key)
+def get_dataframe(text):
+  # Initialize empty lists for each column
+  numbers = []
+  timestamps = []
+  texts = []
+  # Initialize variables to hold the current block's data
+  current_number = None
+  current_timestamp = None
+  current_text = ""
+  lines = text.split("\n")
+  # Process each line in the file
+  for line in lines:
+      line =  line.strip()  # Remove leading and trailing whitespace
+      # If the line starts with a number, it's the start of a new block
+      if line.isdigit():
+          # If this isn't the first block, save the data from the previous block
+          if current_number is not None:
+              numbers.append(current_number)
+              timestamps.append(current_timestamp)
+              texts.append(current_text)
+          # Initialize data for the new block
+          current_number = line
+          current_timestamp = None
+          current_text = ""
+      # If the line starts with a timestamp, it's the timestamp for the current block
+      elif '-->' in line:
+          current_timestamp = line
+      # Otherwise, it's part of the text for the current block
+      else:
+          current_text += line + "\n"  # Add the line to the current text, along with a newline character
+  # Append the last block to the lists (if there is any)
+  if current_number is not None:
+      numbers.append(current_number)
+      timestamps.append(current_timestamp)
+      texts.append(current_text)
+  # Create DataFrame
+  df = pd.DataFrame({
+      'Number': numbers,
+      'Timestamp': timestamps,
+      'Text': texts
+  })
+  return df
+def translate_text(source_language, target_language, TEXT, max_cpl, ideal_cpl):
+    response = client.chat.completions.create(
+      model="gpt-3.5-turbo-0125",
+      temperature = 0.1,
+      messages=[
+        {"role": "system", "content": "You are a multilingual translator for movies subtitles."},
+        {"role": "system", "content": "The number of input characters and output characters should be the same despite the change in language."},
+        {"role": "system", "content": f"Ideal characters per line is {ideal_cpl} and maximum alloed charactr per line is {max_cpl}"},
+         {"role": "system", "content": "In response, maximum per line is {}  "},
+        {"role": "system", "content": "Maximum two lines are allowed for the response"},
+        {"role": "system", "content": "You MUST USE NEW LINE WHERE ALREADY USED IN THE GIVEN TEXT"},
+         {"role": "system", "content": "YOU MUST KEEP ALL THE SEPARATORS IN THE RIGHT PLACE WHERE ALREADY PLACED IN THE ORIGINAL TEXT"},
+        {"role": "system", "content": "You SHOULD NOT SKIP ANY LINE OR ANY INFORMATION"},
+        {"role": "system", "content": "The Tranlation should be error proof"},
+        {"role": "user", "content": f"""Translate the text from {source_language} language to {target_language} language.:
+        \nTEXT: {TEXT}
+        \nREMEMBER: MAXIMUM CHARACTERS PER LINE IN RESPONSE ARE {max_cpl}
+        \nREMEMBER: MAXIMUM LINES ALLOWED IN THE RESPONSE IS 02
+        So make the translation accordingly so it accomodates the limit
+        NOTE: THE OUTPUT SHOULD BE IN  {target_language} language.
+        """},
+      ]
+    )
+    return response.choices[0].message.content
+def translate_text_correct(source_language, target_language, TEXT, max_cpl, ideal_cpl):
+    print("from the correction fucntion")
+    response = client.chat.completions.create(
+      model="gpt-3.5-turbo-0125",
+      temperature = 0.1,
+      messages=[
+        {"role": "system", "content": "You reduce the size of the sentences."},
+        {"role": "system", "content": f"The maximuim output sentecne should not be more than {max_cpl} characters."},
+        {"role": "user", "content": f"""
+        DO NOT CHANGE THE LANGUAGE
+         Reduce the size of the text to less than {max_cpl} even if there is a change in meaning.
+         \nWrite the sentence in shortest possible manner
+        \nTEXT: {TEXT}
+        """},
+      ]
+    )
+    return response.choices[0].message.content
+def check_conditions(response, source_language, target_language, text, max_cpl, ideal_cpl, max_lines=2):
+    lines = response.split("\n")
+    num_lines = len(lines) + 1
+    for i, line in enumerate(lines):
+        if len(line) >= max_cpl:
+            print(line, "False")
+            # Modify the line
+            lines[i] = translate_text_correct(source_language, target_language, line, max_cpl, ideal_cpl)
+            # Recursively check the modified line
+            response = "\n".join(lines)
+            return check_conditions(response, source_language, target_language, text, max_cpl, ideal_cpl, max_lines)
+        else:
+            print(line, "True")
+    return response
+def get_translation(text, source_language, target_language, max_cpl, ideal_cpl):
+    df = get_dataframe(text)
+    translated_text = []
+    for i in range(len(df)):
+        text = df['Text'][i]
+        response = translate_text(source_language, target_language, text, max_cpl, ideal_cpl)
+        response = check_conditions(response, source_language, target_language, text, max_cpl, ideal_cpl, max_lines = 2)
+        translated_text.append(response)
+    df['Translated_text'] = translated_text
+    return df
+def translate(text, source_language, target_language, max_cpl, ideal_cpl):
+    # Translate text
+    df = get_translation(text, source_language, target_language, max_cpl, ideal_cpl)
+    # Create output .srt content
+    output_srt = ""
+    for i, row in df.iterrows():
+        output_srt += f"{row['Number']}\n{row['Timestamp']}\n{row['Translated_text']}\n\n"
+    return output_srt
+# Interface for the Gradio app
+interface = gr.Interface(
+    fn=translate,
+    inputs=[
+        gr.Textbox(label="Paste subtitles here" ),
+        gr.Textbox(label="Source Language (e.g., en)"),
+        gr.Textbox(label="Target Language (e.g., fr)"),
+        gr.Slider(minimum=1, maximum=100, label="Max Characters Per Line"),
+        gr.Slider(minimum=1, maximum=100, label="Ideal Characters Per Line"),
+    ],
+    outputs="text",
+    title="Subtitle Translator",
+    description="Translate subtitles to another language.",
+    allow_flagging=True  # Enable user feedback for improvement
+)
+# Launch the Gradio app
+interface.launch(debug = True)