Spaces:
Sleeping
Sleeping
Right to Left Direction in Word to Persian
Browse files
app.py
CHANGED
|
@@ -7,6 +7,8 @@ from fpdf import FPDF # For PDF output
|
|
| 7 |
from pptx import Presentation # For PowerPoint output
|
| 8 |
import subprocess # To use ffmpeg for embedding subtitles
|
| 9 |
import shlex # For better command-line argument handling
|
|
|
|
|
|
|
| 10 |
|
| 11 |
# Load the Whisper model
|
| 12 |
model = whisper.load_model("tiny") # Smaller model for faster transcription
|
|
@@ -81,16 +83,35 @@ def embed_hardsub_in_video(video_file, srt_file, output_video):
|
|
| 81 |
except Exception as e:
|
| 82 |
raise RuntimeError(f"Error running ffmpeg: {e}")
|
| 83 |
|
| 84 |
-
|
| 85 |
-
|
|
|
|
|
|
|
|
|
|
| 86 |
doc = Document()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
for i, segment in enumerate(transcription['segments']):
|
| 88 |
text = segment['text']
|
| 89 |
|
| 90 |
if translation_model:
|
| 91 |
text = translate_text(text, tokenizer, translation_model)
|
|
|
|
|
|
|
|
|
|
| 92 |
|
| 93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
doc.save(output_file)
|
| 95 |
|
| 96 |
def write_pdf(transcription, output_file, tokenizer=None, translation_model=None):
|
|
|
|
| 7 |
from pptx import Presentation # For PowerPoint output
|
| 8 |
import subprocess # To use ffmpeg for embedding subtitles
|
| 9 |
import shlex # For better command-line argument handling
|
| 10 |
+
from docx.oxml.ns import qn
|
| 11 |
+
from docx.oxml import OxmlElement
|
| 12 |
|
| 13 |
# Load the Whisper model
|
| 14 |
model = whisper.load_model("tiny") # Smaller model for faster transcription
|
|
|
|
| 83 |
except Exception as e:
|
| 84 |
raise RuntimeError(f"Error running ffmpeg: {e}")
|
| 85 |
|
| 86 |
+
from docx.oxml.ns import qn
|
| 87 |
+
from docx.oxml import OxmlElement
|
| 88 |
+
|
| 89 |
+
def write_word(transcription, output_file, tokenizer=None, translation_model=None, target_language=None):
|
| 90 |
+
"""Creates a Word document from the transcription with support for RTL when translating to Persian."""
|
| 91 |
doc = Document()
|
| 92 |
+
|
| 93 |
+
# Check if the target language is Persian for RTL text direction
|
| 94 |
+
rtl = target_language == "fa"
|
| 95 |
+
|
| 96 |
for i, segment in enumerate(transcription['segments']):
|
| 97 |
text = segment['text']
|
| 98 |
|
| 99 |
if translation_model:
|
| 100 |
text = translate_text(text, tokenizer, translation_model)
|
| 101 |
+
|
| 102 |
+
# Add a paragraph with the text
|
| 103 |
+
para = doc.add_paragraph(f"{i + 1}. {text.strip()}")
|
| 104 |
|
| 105 |
+
# If RTL is required, modify the paragraph's properties
|
| 106 |
+
if rtl:
|
| 107 |
+
# Set the paragraph direction to RTL
|
| 108 |
+
para_format = para.paragraph_format
|
| 109 |
+
para_format.right_to_left = True
|
| 110 |
+
|
| 111 |
+
# Set RTL for the text itself
|
| 112 |
+
run = para.runs[0]
|
| 113 |
+
run._element.rPr.append(OxmlElement('w:bidi'))
|
| 114 |
+
|
| 115 |
doc.save(output_file)
|
| 116 |
|
| 117 |
def write_pdf(transcription, output_file, tokenizer=None, translation_model=None):
|