Spaces:
Runtime error
Runtime error
| # import gradio as gr | |
| # Def_04 Docx file to translated_Docx file | |
| from transformers import MarianMTModel, MarianTokenizer | |
| import nltk | |
| from nltk.tokenize import sent_tokenize | |
| from nltk.tokenize import LineTokenizer | |
| nltk.download('punkt') | |
| import math | |
| import torch | |
| from docx import Document | |
| from time import sleep | |
| import docx | |
| def getText(filename): | |
| doc = docx.Document(filename) | |
| fullText = [] | |
| for para in doc.paragraphs: | |
| fullText.append(para.text) | |
| return '\n'.join(fullText) | |
| # Def_01 applying process bar to function | |
| import sys | |
| def print_progress_bar(index, total, label): | |
| n_bar = 50 # Progress bar width | |
| progress = index / total | |
| sys.stdout.write('\r') | |
| sys.stdout.write(f"[{'=' * int(n_bar * progress):{n_bar}s}] {int(100 * progress)}% {label}") | |
| sys.stdout.flush() | |
| if torch.cuda.is_available(): | |
| dev = "cuda" | |
| else: | |
| dev = "cpu" | |
| device = torch.device(dev) | |
| mname = "Helsinki-NLP/opus-mt-en-hi" | |
| tokenizer = MarianTokenizer.from_pretrained(mname) | |
| model = MarianMTModel.from_pretrained(mname) | |
| model.to(device) | |
| def btTranslator(docxfile): | |
| a=getText(docxfile) | |
| a1=a.split('\n') | |
| bigtext=''' ''' | |
| for a in a1: | |
| bigtext=bigtext+'\n'+a | |
| files=Document() | |
| lt = LineTokenizer() | |
| batch_size = 8 | |
| paragraphs = lt.tokenize(bigtext) | |
| translated_paragraphs = [] | |
| for index, paragraph in enumerate(paragraphs): | |
| # ###################################### | |
| total=len(paragraphs) | |
| print_progress_bar(index, total, "Percentage Bar") | |
| sleep(0.5) | |
| # ###################################### | |
| sentences = sent_tokenize(paragraph) | |
| batches = math.ceil(len(sentences) / batch_size) | |
| translated = [] | |
| for i in range(batches): | |
| sent_batch = sentences[i*batch_size:(i+1)*batch_size] | |
| model_inputs = tokenizer(sent_batch, return_tensors="pt", padding=True, truncation=True, max_length=500).to(device) | |
| with torch.no_grad(): | |
| translated_batch = model.generate(**model_inputs) | |
| translated += translated_batch | |
| translated = [tokenizer.decode(t, skip_special_tokens=True) for t in translated] | |
| translated_paragraphs += [" ".join(translated)] | |
| files.add_paragraph(translated) | |
| # translated_text = "\n".join(translated_paragraphs) | |
| f=files.save(f"Translated_{docxfile[23:]}") | |
| return translated_paragraphs,f | |
| import gradio as gr | |
| interface = gr.Interface(fn=btTranslator, | |
| inputs=gr.inputs.Textbox(lines=1), | |
| # inputs = gr.inputs.File(file_count="multiple",label="Input Files"), | |
| # inputs= | |
| outputs=['text','file'], | |
| show_progress=True | |
| ) | |
| interface.launch(debug=True) | |