import os, tempfile, uuid, pdfplumber import gradio as gr from groq import Groq from docx import Document # python-docx # ── Groq setup ────────────────────────────────────────────────────────────────── client = Groq(api_key=os.getenv("GROQ_API_KEY")) MODEL_NAME = "llama3-8b-8192" LANGS = ["English", "Urdu", "Spanish", "French", "Arabic", "Chinese", "German","Hindi","Turkish"] # ── Core translator (single chunk) ────────────────────────────────────────────── def llm_translate(chunk: str, src: str, tgt: str) -> str: if src == tgt: # shortcut return chunk prompt = ( "You are a professional multilingual translator. " f"Translate the following text from {src} to {tgt}.\n\n{chunk}" ) resp = client.chat.completions.create( model=MODEL_NAME, messages=[{"role": "system", "content": "Translator"}, {"role": "user", "content": prompt}], temperature=0.2, max_completion_tokens=2048, ) return resp.choices[0].message.content.strip() # ── Document helpers ──────────────────────────────────────────────────────────── MAX_CHARS = 6000 # stay far below LLM context; tune as needed def translate_long_text(text, src, tgt): """Chunk long text on paragraph boundaries and re‑assemble.""" out, buff = [], [] for para in text.splitlines(keepends=True): buff.append(para) if sum(len(p) for p in buff) >= MAX_CHARS: out.append(llm_translate("".join(buff), src, tgt)) buff = [] if buff: out.append(llm_translate("".join(buff), src, tgt)) return "\n".join(out) def handle_docx(path, src, tgt): doc_in = Document(path) doc_out = Document() for para in doc_in.paragraphs: translated = translate_long_text(para.text, src, tgt) doc_out.add_paragraph(translated) out_path = os.path.join(tempfile.gettempdir(), f"{uuid.uuid4()}.docx") doc_out.save(out_path) return out_path def handle_pdf(path, src, tgt): txt = [] with pdfplumber.open(path) as pdf: for page in pdf.pages: txt.append(page.extract_text() or "") translated_text = translate_long_text("\n".join(txt), src, tgt) # save as .txt for simplicity; could generate PDF if you prefer out_path = os.path.join(tempfile.gettempdir(), f"{uuid.uuid4()}.txt") with open(out_path, "w", encoding="utf-8") as f: f.write(translated_text) return out_path # ── Gradio UI (Blocks) ────────────────────────────────────────────────────────── with gr.Blocks(theme=gr.themes.Default(primary_hue="teal", font="poppins")) as demo: gr.Markdown("# 🌍 AI Multilingual Translator") with gr.Tab("✏️ Text"): with gr.Row(): with gr.Column(): txt_in = gr.Textbox(lines=5, label="Input") src1 = gr.Dropdown(LANGS, value="English", label="From") tgt1 = gr.Dropdown(LANGS, value="Urdu", label="To") btn1 = gr.Button("🔁 Translate", variant="primary") with gr.Column(): txt_out = gr.Textbox(lines=5, label="Translation") btn1.click(llm_translate, inputs=[txt_in, src1, tgt1], outputs=txt_out) with gr.Tab("📄 Document"): with gr.Row(): with gr.Column(): file_in = gr.File(label="Upload PDF or DOCX") src2 = gr.Dropdown(LANGS, value="English", label="From") tgt2 = gr.Dropdown(LANGS, value="Urdu", label="To") btn2 = gr.Button("🚀 Translate File", variant="primary") with gr.Column(): file_out = gr.File(label="Download translated file") def translate_file(file, src, tgt): if file is None: raise gr.Error("Please upload a file.") ext = os.path.splitext(file.name)[1].lower() if ext == ".docx": return handle_docx(file.name, src, tgt) elif ext == ".pdf": return handle_pdf(file.name, src, tgt) else: raise gr.Error("Unsupported format (only PDF & DOCX).") btn2.click(translate_file, inputs=[file_in, src2, tgt2], outputs=file_out) gr.Markdown("---\nMade with ❤️ by Groq & Gradio") if __name__ == "__main__": demo.launch()