ZunairaHawwar's picture
Update app.py
8f80246 verified
import os, tempfile, uuid, pdfplumber
import gradio as gr
from groq import Groq
from docx import Document # python-docx
# ── Groq setup ──────────────────────────────────────────────────────────────────
client = Groq(api_key=os.getenv("GROQ_API_KEY"))
MODEL_NAME = "llama3-8b-8192"
LANGS = ["English", "Urdu", "Spanish", "French", "Arabic", "Chinese", "German","Hindi","Turkish"]
# ── Core translator (single chunk) ──────────────────────────────────────────────
def llm_translate(chunk: str, src: str, tgt: str) -> str:
if src == tgt: # shortcut
return chunk
prompt = (
"You are a professional multilingual translator. "
f"Translate the following text from {src} to {tgt}.\n\n{chunk}"
)
resp = client.chat.completions.create(
model=MODEL_NAME,
messages=[{"role": "system", "content": "Translator"}, {"role": "user", "content": prompt}],
temperature=0.2,
max_completion_tokens=2048,
)
return resp.choices[0].message.content.strip()
# ── Document helpers ────────────────────────────────────────────────────────────
MAX_CHARS = 6000 # stay far below LLM context; tune as needed
def translate_long_text(text, src, tgt):
"""Chunk long text on paragraph boundaries and re‑assemble."""
out, buff = [], []
for para in text.splitlines(keepends=True):
buff.append(para)
if sum(len(p) for p in buff) >= MAX_CHARS:
out.append(llm_translate("".join(buff), src, tgt))
buff = []
if buff:
out.append(llm_translate("".join(buff), src, tgt))
return "\n".join(out)
def handle_docx(path, src, tgt):
doc_in = Document(path)
doc_out = Document()
for para in doc_in.paragraphs:
translated = translate_long_text(para.text, src, tgt)
doc_out.add_paragraph(translated)
out_path = os.path.join(tempfile.gettempdir(), f"{uuid.uuid4()}.docx")
doc_out.save(out_path)
return out_path
def handle_pdf(path, src, tgt):
txt = []
with pdfplumber.open(path) as pdf:
for page in pdf.pages:
txt.append(page.extract_text() or "")
translated_text = translate_long_text("\n".join(txt), src, tgt)
# save as .txt for simplicity; could generate PDF if you prefer
out_path = os.path.join(tempfile.gettempdir(), f"{uuid.uuid4()}.txt")
with open(out_path, "w", encoding="utf-8") as f:
f.write(translated_text)
return out_path
# ── Gradio UI (Blocks) ──────────────────────────────────────────────────────────
with gr.Blocks(theme=gr.themes.Default(primary_hue="teal", font="poppins")) as demo:
gr.Markdown("# πŸŒβ€―AI Multilingual Translator")
with gr.Tab("✏️ Text"):
with gr.Row():
with gr.Column():
txt_in = gr.Textbox(lines=5, label="Input")
src1 = gr.Dropdown(LANGS, value="English", label="From")
tgt1 = gr.Dropdown(LANGS, value="Urdu", label="To")
btn1 = gr.Button("πŸ”Β Translate", variant="primary")
with gr.Column():
txt_out = gr.Textbox(lines=5, label="Translation")
btn1.click(llm_translate, inputs=[txt_in, src1, tgt1], outputs=txt_out)
with gr.Tab("πŸ“„Β Document"):
with gr.Row():
with gr.Column():
file_in = gr.File(label="Upload PDF or DOCX")
src2 = gr.Dropdown(LANGS, value="English", label="From")
tgt2 = gr.Dropdown(LANGS, value="Urdu", label="To")
btn2 = gr.Button("πŸš€Β Translate File", variant="primary")
with gr.Column():
file_out = gr.File(label="Download translated file")
def translate_file(file, src, tgt):
if file is None:
raise gr.Error("Please upload a file.")
ext = os.path.splitext(file.name)[1].lower()
if ext == ".docx":
return handle_docx(file.name, src, tgt)
elif ext == ".pdf":
return handle_pdf(file.name, src, tgt)
else:
raise gr.Error("Unsupported format (only PDF & DOCX).")
btn2.click(translate_file, inputs=[file_in, src2, tgt2], outputs=file_out)
gr.Markdown("---\nMade with ❀️ by Groq & Gradio")
if __name__ == "__main__":
demo.launch()