|
|
import os, tempfile, uuid, pdfplumber |
|
|
import gradio as gr |
|
|
from groq import Groq |
|
|
from docx import Document |
|
|
|
|
|
|
|
|
client = Groq(api_key=os.getenv("GROQ_API_KEY")) |
|
|
MODEL_NAME = "llama3-8b-8192" |
|
|
LANGS = ["English", "Urdu", "Spanish", "French", "Arabic", "Chinese", "German","Hindi","Turkish"] |
|
|
|
|
|
|
|
|
def llm_translate(chunk: str, src: str, tgt: str) -> str: |
|
|
if src == tgt: |
|
|
return chunk |
|
|
prompt = ( |
|
|
"You are a professional multilingual translator. " |
|
|
f"Translate the following text from {src} to {tgt}.\n\n{chunk}" |
|
|
) |
|
|
resp = client.chat.completions.create( |
|
|
model=MODEL_NAME, |
|
|
messages=[{"role": "system", "content": "Translator"}, {"role": "user", "content": prompt}], |
|
|
temperature=0.2, |
|
|
max_completion_tokens=2048, |
|
|
) |
|
|
return resp.choices[0].message.content.strip() |
|
|
|
|
|
|
|
|
MAX_CHARS = 6000 |
|
|
|
|
|
def translate_long_text(text, src, tgt): |
|
|
"""Chunk long text on paragraph boundaries and reβassemble.""" |
|
|
out, buff = [], [] |
|
|
for para in text.splitlines(keepends=True): |
|
|
buff.append(para) |
|
|
if sum(len(p) for p in buff) >= MAX_CHARS: |
|
|
out.append(llm_translate("".join(buff), src, tgt)) |
|
|
buff = [] |
|
|
if buff: |
|
|
out.append(llm_translate("".join(buff), src, tgt)) |
|
|
return "\n".join(out) |
|
|
|
|
|
def handle_docx(path, src, tgt): |
|
|
doc_in = Document(path) |
|
|
doc_out = Document() |
|
|
for para in doc_in.paragraphs: |
|
|
translated = translate_long_text(para.text, src, tgt) |
|
|
doc_out.add_paragraph(translated) |
|
|
out_path = os.path.join(tempfile.gettempdir(), f"{uuid.uuid4()}.docx") |
|
|
doc_out.save(out_path) |
|
|
return out_path |
|
|
|
|
|
def handle_pdf(path, src, tgt): |
|
|
txt = [] |
|
|
with pdfplumber.open(path) as pdf: |
|
|
for page in pdf.pages: |
|
|
txt.append(page.extract_text() or "") |
|
|
translated_text = translate_long_text("\n".join(txt), src, tgt) |
|
|
|
|
|
out_path = os.path.join(tempfile.gettempdir(), f"{uuid.uuid4()}.txt") |
|
|
with open(out_path, "w", encoding="utf-8") as f: |
|
|
f.write(translated_text) |
|
|
return out_path |
|
|
|
|
|
|
|
|
with gr.Blocks(theme=gr.themes.Default(primary_hue="teal", font="poppins")) as demo: |
|
|
gr.Markdown("# πβ―AI Multilingual Translator") |
|
|
|
|
|
with gr.Tab("βοΈΒ Text"): |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
txt_in = gr.Textbox(lines=5, label="Input") |
|
|
src1 = gr.Dropdown(LANGS, value="English", label="From") |
|
|
tgt1 = gr.Dropdown(LANGS, value="Urdu", label="To") |
|
|
btn1 = gr.Button("πΒ Translate", variant="primary") |
|
|
with gr.Column(): |
|
|
txt_out = gr.Textbox(lines=5, label="Translation") |
|
|
btn1.click(llm_translate, inputs=[txt_in, src1, tgt1], outputs=txt_out) |
|
|
|
|
|
with gr.Tab("πΒ Document"): |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
file_in = gr.File(label="Upload PDF or DOCX") |
|
|
src2 = gr.Dropdown(LANGS, value="English", label="From") |
|
|
tgt2 = gr.Dropdown(LANGS, value="Urdu", label="To") |
|
|
btn2 = gr.Button("πΒ Translate File", variant="primary") |
|
|
with gr.Column(): |
|
|
file_out = gr.File(label="Download translated file") |
|
|
|
|
|
def translate_file(file, src, tgt): |
|
|
if file is None: |
|
|
raise gr.Error("Please upload a file.") |
|
|
ext = os.path.splitext(file.name)[1].lower() |
|
|
if ext == ".docx": |
|
|
return handle_docx(file.name, src, tgt) |
|
|
elif ext == ".pdf": |
|
|
return handle_pdf(file.name, src, tgt) |
|
|
else: |
|
|
raise gr.Error("Unsupported format (only PDF & DOCX).") |
|
|
|
|
|
btn2.click(translate_file, inputs=[file_in, src2, tgt2], outputs=file_out) |
|
|
|
|
|
gr.Markdown("---\nMade with β€οΈΒ by GroqΒ &Β Gradio") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |
|
|
|