ZomiTranslator / app.py
Juna190825's picture
Update app.py
33f430f verified
# import gradio as gr
# from fastapi import FastAPI
# from fastapi.middleware.cors import CORSMiddleware
# from pydantic import BaseModel
# from gradio_client import Client
# import uvicorn
# import os
# ############ logging, and committing translation ##############
# from huggingface_hub import HfApi, CommitOperationAdd
# import time
# from datetime import datetime
# from langdetect import detect
# from huggingface_hub import update_dataset_card
# import json
# import threading
# import queue
# import hashlib
# HF_DATASET = "Juna190825/zomi-translation-logs"
# HF_TOKEN = os.getenv("HF_TOKEN")
# ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD")
# api = HfApi(token=HF_TOKEN)
# log_queue = queue.Queue()
# LOG_DIR = "/data"
# BUFFER_FILE = os.path.join(LOG_DIR, "log_buffer.jsonl")
# COMMIT_INTERVAL_SECONDS = 900 # every 15 minutes
# def append_log(input_text: str, output_text: str):
# os.makedirs(LOG_DIR, exist_ok=True)
# record = {
# "ts": datetime.utcnow().isoformat() + "Z",
# "src_text": input_text[:500],
# "tgt_text": output_text[:500],
# "app": "zomi-translator",
# "version": "1.0.0"
# }
# with open(BUFFER_FILE, "a", encoding="utf-8") as f:
# f.write(json.dumps(record, ensure_ascii=False) + "\n")
# def commit_logs_to_hf(manual=False):
# if not HF_TOKEN or not os.path.exists(BUFFER_FILE):
# return "No logs to commit."
# if not os.path.exists(BUFFER_FILE) or os.path.getsize(BUFFER_FILE) < 10:
# return "No new logs."
# with open(BUFFER_FILE, "r", encoding="utf-8") as f:
# data = f.read().strip()
# if not data:
# return "No new logs."
# date_str = datetime.utcnow().strftime("%Y-%m-%d")
# repo_path = f"logs/{date_str}.jsonl"
# api.upload_file(
# path_or_fileobj=BUFFER_FILE,
# path_in_repo=repo_path,
# repo_id=HF_DATASET,
# repo_type="dataset",
# commit_message="Manual log commit" if manual else "Auto daily log commit"
# )
# # Clear buffer after successful commit
# open(BUFFER_FILE, "w").close()
# return f"Committed logs to {repo_path}"
# def update_dataset_card_info():
# api = HfApi()
# # Build the README.md content dynamically
# readme = f"""
# # Zomi Translator Logs
# **License:** MIT
# **Languages:** Zomi, English
# Daily logs of Zomi ↔ English translations.
# """
# api.create_commit(
# repo_id=HF_DATASET,
# repo_type="dataset",
# commit_message="Update dataset card metadata",
# operations=[
# CommitOperationAdd(
# path_in_repo="README.md",
# path_or_fileobj=readme.encode("utf-8")
# )
# ]
# )
# def manual_commit(password: str):
# if not ADMIN_PASSWORD:
# return "❌ Admin password not configured."
# if password != ADMIN_PASSWORD:
# return "❌ Invalid admin password."
# result = commit_logs_to_hf(manual=True)
# return f"βœ… {result}"
# def append_log_async(input_text, output_text, direction):
# log_queue.put({
# "ts": datetime.utcnow().isoformat() + "Z",
# # "src_text_hash": hashlib.sha256(input_text.encode("utf-8")).hexdigest(),
# "src_text": input_text,
# "tgt_text": output_text[:500],
# "direction": direction,
# "app": "zomi-translator",
# "version": "1.0.0"
# })
# def async_commit_worker():
# buffer = []
# while True:
# try:
# # Collect up to 50 logs or 60 seconds
# start = time.time()
# while len(buffer) < 50 and (time.time() - start < 60):
# try:
# buffer.append(log_queue.get(timeout=1))
# except queue.Empty:
# pass
# if buffer:
# os.makedirs(LOG_DIR, exist_ok=True)
# date_str = datetime.utcnow().strftime("%Y-%m-%d")
# batch_file = os.path.join(LOG_DIR, f"{date_str}.jsonl")
# with open(batch_file, "a", encoding="utf-8") as f:
# for record in buffer:
# f.write(json.dumps(record, ensure_ascii=False) + "\n")
# commit_logs_to_hf() # commits the batch file
# update_dataset_card_info()
# buffer.clear()
# except Exception as e:
# print("Async commit failed:", e)
# ###############################################################
# app = FastAPI()
# # Enable CORS
# app.add_middleware(
# CORSMiddleware,
# allow_origins=["*"],
# allow_credentials=True,
# allow_methods=["*"],
# allow_headers=["*"],
# )
# # Initialize client once
# translator_client = Client("Chatboong/Gemini_Translator")
# def call_translator(text: str):
# msg = f"Translate Zomi to English, if it is English translate it to Zomi: '{text}'\n"
# stream = translator_client.predict(
# message=msg,
# lang="English",
# is_streaming=True,
# api_name="/chat",
# )
# output = ""
# for chunk in stream:
# output += str(chunk)
# # Remove prefix
# prefix = "Translate Zomi to English, if it is English translate it to Zomi: "
# prefix2 = "Zomi pan English in tei in, English ahih leh Zomi in tei in: "
# prefix3 = 'Zomi-in tei in, English ahih leh Zomi-in tei in: '
# if output.startswith(prefix):
# output = output[len(prefix):].strip()
# # Remove surrounding quotes
# if (output.startswith('"') and output.endswith('"')) or (output.startswith("'") and output.endswith("'")):
# output = output[1:-1].strip()
# elif output.startswith(prefix2):
# output = output[len(prefix2):].strip()
# # Remove surrounding quotes
# if (output.startswith('"') and output.endswith('"')) or (output.startswith("'") and output.endswith("'")):
# output = output[1:-1].strip()
# elif output.startswith(prefix3):
# output = output[len(prefix3):].strip()
# # Remove surrounding quotes
# if (output.startswith('"') and output.endswith('"')) or (output.startswith("'") and output.endswith("'")):
# output = output[1:-1].strip()
# append_log(text, output)
# return output
# def detect_direction(text: str) -> str:
# try:
# lang = detect(text)
# if lang == "en":
# return "en-zomi"
# else:
# return "zomi-en"
# except:
# return "en-zomi"
# def translate_zomi(text: str):
# direction = detect_direction(text)
# output = call_translator(text) # your existing streaming code
# append_log_async(text, output, direction)
# return output
# class ChatRequest(BaseModel):
# message: str
# @app.post("/chat")
# async def chat_api(req: ChatRequest):
# translation = translate_zomi(req.message)
# return {"translation": translation}
# def chat_ui(message: str):
# return translate_zomi(message)
# with gr.Blocks() as demo:
# gr.Markdown("### Zomi Translator")
# inp = gr.Textbox(label="Input")
# out = gr.Textbox(label="Output")
# inp.submit(chat_ui, inp, out)
# if ADMIN_PASSWORD:
# gr.Markdown("### Admin (Protected)")
# admin_pw = gr.Textbox(
# label="Admin Password",
# type="password",
# placeholder="Enter admin password"
# )
# commit_btn = gr.Button("πŸ“¦ Commit Logs Now")
# status = gr.Textbox(label="Commit Status", interactive=False)
# commit_btn.click(
# manual_commit,
# inputs=admin_pw,
# outputs=status
# ).then(
# lambda: "",
# None,
# admin_pw
# )
# if HF_TOKEN:
# threading.Thread(target=async_commit_worker, daemon=True).start()
# # Mount Gradio under the FastAPI app
# app = gr.mount_gradio_app(app, demo, path="/")
# if __name__ == "__main__":
# # Respect PORT env var (used by Hugging Face Spaces)
# port = int(os.getenv("PORT", "7860"))
# uvicorn.run(app, host="0.0.0.0", port=port)
import gradio as gr
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from gradio_client import Client
import uvicorn
import os
############ logging, and committing translation ##############
from huggingface_hub import HfApi, CommitOperationAdd, RepoCard
import time
from datetime import datetime
from langdetect import detect
import json
import threading
import queue
import hashlib
HF_DATASET = "Juna190825/zomi-translation-logs"
HF_TOKEN = os.getenv("HF_TOKEN")
ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD")
api = HfApi(token=HF_TOKEN)
log_queue = queue.Queue()
LOG_DIR = "/data"
BUFFER_FILE = os.path.join(LOG_DIR, "log_buffer.jsonl")
COMMIT_INTERVAL_SECONDS = 900 # every 15 minutes
def append_log(input_text: str, output_text: str):
os.makedirs(LOG_DIR, exist_ok=True)
record = {
"ts": datetime.utcnow().isoformat() + "Z",
"src_text": input_text[:500],
"tgt_text": output_text[:500],
"app": "zomi-translator",
"version": "1.0.0"
}
with open(BUFFER_FILE, "a", encoding="utf-8") as f:
f.write(json.dumps(record, ensure_ascii=False) + "\n")
def commit_logs_to_hf(manual=False):
if not HF_TOKEN or not os.path.exists(BUFFER_FILE):
return "No logs to commit."
if not os.path.exists(BUFFER_FILE) or os.path.getsize(BUFFER_FILE) < 10:
return "No new logs."
with open(BUFFER_FILE, "r", encoding="utf-8") as f:
data = f.read().strip()
if not data:
return "No new logs."
date_str = datetime.utcnow().strftime("%Y-%m-%d")
# Add timestamp to make filename unique
if manual:
timestamp = datetime.utcnow().strftime("%Y%m%d-%H%M%S")
repo_path = f"logs/manual_{date_str}_{timestamp}.jsonl"
commit_msg = f"Manual log commit {timestamp}"
else:
timestamp = datetime.utcnow().strftime("%Y%m%d-%H%M%S")
repo_path = f"logs/auto_{date_str}_{timestamp}.jsonl"
commit_msg = f"Auto log commit {timestamp}"
api.upload_file(
path_or_fileobj=BUFFER_FILE,
path_in_repo=repo_path,
repo_id=HF_DATASET,
repo_type="dataset",
commit_message=commit_msg
)
# Clear buffer after successful commit
open(BUFFER_FILE, "w").close()
return f"Committed logs to {repo_path}"
def update_dataset_card_info():
# Build the README.md content dynamically
readme_content = f"""
# Zomi Translator Logs
**License:** MIT
**Languages:** Zomi, English
Daily logs of Zomi ↔ English translations.
"""
# Create a RepoCard object
card = RepoCard(readme_content)
api = HfApi()
# Upload README.md to the dataset repo
api.upload_file(
path_or_fileobj=card.content.encode("utf-8"),
path_in_repo="README.md",
repo_id=HF_DATASET,
repo_type="dataset"
)
def manual_commit(password: str):
if not ADMIN_PASSWORD:
return "❌ Admin password not configured."
if password != ADMIN_PASSWORD:
return "❌ Invalid admin password."
result = commit_logs_to_hf(manual=True)
return f"βœ… {result}"
def append_log_async(input_text, output_text, direction):
log_queue.put({
"ts": datetime.utcnow().isoformat() + "Z",
"src_text": input_text,
"tgt_text": output_text[:500],
"direction": direction,
"app": "zomi-translator",
"version": "1.0.0"
})
def async_commit_worker():
buffer = []
while True:
try:
# Collect up to 50 logs or 60 seconds
start = time.time()
while len(buffer) < 50 and (time.time() - start < 60):
try:
buffer.append(log_queue.get(timeout=1))
except queue.Empty:
pass
if buffer:
os.makedirs(LOG_DIR, exist_ok=True)
date_str = datetime.utcnow().strftime("%Y-%m-%d")
batch_file = os.path.join(LOG_DIR, f"{date_str}.jsonl")
with open(batch_file, "a", encoding="utf-8") as f:
for record in buffer:
f.write(json.dumps(record, ensure_ascii=False) + "\n")
commit_logs_to_hf() # commits the batch file
update_dataset_card_info()
buffer.clear()
except Exception as e:
print("Async commit failed:", e)
###############################################################
app = FastAPI()
# Enable CORS
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Initialize client once
translator_client = Client("Chatboong/Gemini_Translator")
def call_translator(text: str):
msg = f"Translate Zomi to English, if it is English translate it to Zomi: '{text}'\n"
stream = translator_client.predict(
message=msg,
lang="English",
is_streaming=True,
api_name="/chat",
)
output = ""
for chunk in stream:
output += str(chunk)
# Remove prefix
prefix = "Translate Zomi to English, if it is English translate it to Zomi: "
prefix2 = "Zomi pan English in tei in, English ahih leh Zomi in tei in: "
prefix3 = 'Zomi-in tei in, English ahih leh Zomi-in tei in: '
if output.startswith(prefix):
output = output[len(prefix):].strip()
# Remove surrounding quotes
if (output.startswith('"') and output.endswith('"')) or (output.startswith("'") and output.endswith("'")):
output = output[1:-1].strip()
elif output.startswith(prefix2):
output = output[len(prefix2):].strip()
# Remove surrounding quotes
if (output.startswith('"') and output.endswith('"')) or (output.startswith("'") and output.endswith("'")):
output = output[1:-1].strip()
elif output.startswith(prefix3):
output = output[len(prefix3):].strip()
# Remove surrounding quotes
if (output.startswith('"') and output.endswith('"')) or (output.startswith("'") and output.endswith("'")):
output = output[1:-1].strip()
append_log(text, output)
return output
def detect_direction(text: str) -> str:
try:
lang = detect(text)
if lang == "en":
return "en-zomi"
else:
return "zomi-en"
except:
return "en-zomi"
def translate_zomi(text: str):
direction = detect_direction(text)
output = call_translator(text) # your existing streaming code
append_log_async(text, output, direction)
return output
class ChatRequest(BaseModel):
message: str
@app.post("/chat")
async def chat_api(req: ChatRequest):
translation = translate_zomi(req.message)
return {"translation": translation}
def chat_ui(message: str):
return translate_zomi(message)
with gr.Blocks() as demo:
gr.Markdown("### Zomi Translator")
inp = gr.Textbox(label="Input")
out = gr.Textbox(label="Output")
inp.submit(chat_ui, inp, out)
if ADMIN_PASSWORD:
gr.Markdown("### Admin (Protected)")
admin_pw = gr.Textbox(
label="Admin Password",
type="password",
placeholder="Enter admin password"
)
commit_btn = gr.Button("πŸ“¦ Commit Logs Now")
status = gr.Textbox(label="Commit Status", interactive=False)
commit_btn.click(
manual_commit,
inputs=admin_pw,
outputs=status
).then(
lambda: "",
None,
admin_pw
)
if HF_TOKEN:
threading.Thread(target=async_commit_worker, daemon=True).start()
# Mount Gradio under the FastAPI app
app = gr.mount_gradio_app(app, demo, path="/")
if __name__ == "__main__":
# Respect PORT env var (used by Hugging Face Spaces)
port = int(os.getenv("PORT", "7860"))
uvicorn.run(app, host="0.0.0.0", port=port)