File size: 1,062 Bytes
1b1d5c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
from modules.translator_module import translate_text, NLLB_LANGS, detect_language
import pdfplumber
from docx import Document

def read_txt(file_path):
    with open(file_path,"r",encoding="utf-8") as f:
        return f.read()

def read_pdf(file_path):
    text = ""
    with pdfplumber.open(file_path) as pdf:
        for page in pdf.pages:
            text += page.extract_text() + "\n"
    return text

def read_docx(file_path):
    doc = Document(file_path)
    return "\n".join([p.text for p in doc.paragraphs])

def file_translate(file_path, file_type, target_lang="en"):
    if file_type=="txt":
        text = read_txt(file_path)
    elif file_type=="pdf":
        text = read_pdf(file_path)
    elif file_type=="docx":
        text = read_docx(file_path)
    else:
        raise ValueError("Format non supporté")
    
    detected_lang, _ = detect_language(text)
    translated = translate_text(text, NLLB_LANGS[detected_lang], NLLB_LANGS[target_lang])
    return {"original_text": text, "translated_text": translated}