File size: 4,855 Bytes
9fd9d4b
3a1c01b
 
9fd9d4b
3a1c01b
9fd9d4b
e3ee654
6afbe4c
9fd9d4b
3a1c01b
9fd9d4b
 
 
 
304043d
9fd9d4b
 
 
 
 
 
 
 
304043d
9fd9d4b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3a1c01b
9fd9d4b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
304043d
8f80246
9fd9d4b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
304043d
9fd9d4b
 
 
 
 
 
 
 
 
 
304043d
9fd9d4b
304043d
9fd9d4b
3a1c01b
 
304043d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import os, tempfile, uuid, pdfplumber
import gradio as gr
from groq import Groq
from docx import Document                # python-docx

# ── Groq setup ──────────────────────────────────────────────────────────────────
client = Groq(api_key=os.getenv("GROQ_API_KEY"))
MODEL_NAME = "llama3-8b-8192"
LANGS      = ["English", "Urdu", "Spanish", "French", "Arabic", "Chinese", "German","Hindi","Turkish"]

# ── Core translator (single chunk) ──────────────────────────────────────────────
def llm_translate(chunk: str, src: str, tgt: str) -> str:
    if src == tgt:          # shortcut
        return chunk
    prompt = (
        "You are a professional multilingual translator. "
        f"Translate the following text from {src} to {tgt}.\n\n{chunk}"
    )
    resp = client.chat.completions.create(
        model=MODEL_NAME,
        messages=[{"role": "system", "content": "Translator"}, {"role": "user", "content": prompt}],
        temperature=0.2,
        max_completion_tokens=2048,
    )
    return resp.choices[0].message.content.strip()

# ── Document helpers ────────────────────────────────────────────────────────────
MAX_CHARS = 6000                     # stay far below LLM context; tune as needed

def translate_long_text(text, src, tgt):
    """Chunk long text on paragraph boundaries and re‑assemble."""
    out, buff = [], []
    for para in text.splitlines(keepends=True):
        buff.append(para)
        if sum(len(p) for p in buff) >= MAX_CHARS:
            out.append(llm_translate("".join(buff), src, tgt))
            buff = []
    if buff:
        out.append(llm_translate("".join(buff), src, tgt))
    return "\n".join(out)

def handle_docx(path, src, tgt):
    doc_in  = Document(path)
    doc_out = Document()
    for para in doc_in.paragraphs:
        translated = translate_long_text(para.text, src, tgt)
        doc_out.add_paragraph(translated)
    out_path = os.path.join(tempfile.gettempdir(), f"{uuid.uuid4()}.docx")
    doc_out.save(out_path)
    return out_path

def handle_pdf(path, src, tgt):
    txt = []
    with pdfplumber.open(path) as pdf:
        for page in pdf.pages:
            txt.append(page.extract_text() or "")
    translated_text = translate_long_text("\n".join(txt), src, tgt)
    # save as .txt for simplicity; could generate PDF if you prefer
    out_path = os.path.join(tempfile.gettempdir(), f"{uuid.uuid4()}.txt")
    with open(out_path, "w", encoding="utf-8") as f:
        f.write(translated_text)
    return out_path

# ── Gradio UI (Blocks) ──────────────────────────────────────────────────────────
with gr.Blocks(theme=gr.themes.Default(primary_hue="teal", font="poppins")) as demo:
    gr.Markdown("# πŸŒβ€―AI Multilingual Translator")

    with gr.Tab("✏️ Text"):
        with gr.Row():
            with gr.Column():
                txt_in  = gr.Textbox(lines=5, label="Input")
                src1    = gr.Dropdown(LANGS, value="English", label="From")
                tgt1    = gr.Dropdown(LANGS, value="Urdu",     label="To")
                btn1    = gr.Button("πŸ”Β Translate", variant="primary")
            with gr.Column():
                txt_out = gr.Textbox(lines=5, label="Translation")
        btn1.click(llm_translate, inputs=[txt_in, src1, tgt1], outputs=txt_out)

    with gr.Tab("πŸ“„Β Document"):
        with gr.Row():
            with gr.Column():
                file_in = gr.File(label="Upload PDF or DOCX")
                src2    = gr.Dropdown(LANGS, value="English", label="From")
                tgt2    = gr.Dropdown(LANGS, value="Urdu",     label="To")
                btn2    = gr.Button("πŸš€Β Translate File", variant="primary")
            with gr.Column():
                file_out = gr.File(label="Download translated file")

        def translate_file(file, src, tgt):
            if file is None:
                raise gr.Error("Please upload a file.")
            ext = os.path.splitext(file.name)[1].lower()
            if ext == ".docx":
                return handle_docx(file.name, src, tgt)
            elif ext == ".pdf":
                return handle_pdf(file.name, src, tgt)
            else:
                raise gr.Error("Unsupported format (only PDF & DOCX).")

        btn2.click(translate_file, inputs=[file_in, src2, tgt2], outputs=file_out)

    gr.Markdown("---\nMade with ❀️ by Groq & Gradio")

if __name__ == "__main__":
    demo.launch()