# import gradio as gr # from fastapi import FastAPI # from fastapi.middleware.cors import CORSMiddleware # from pydantic import BaseModel # from gradio_client import Client # import uvicorn # import os # ############ logging, and committing translation ############## # from huggingface_hub import HfApi, CommitOperationAdd # import time # from datetime import datetime # from langdetect import detect # from huggingface_hub import update_dataset_card # import json # import threading # import queue # import hashlib # HF_DATASET = "Juna190825/zomi-translation-logs" # HF_TOKEN = os.getenv("HF_TOKEN") # ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD") # api = HfApi(token=HF_TOKEN) # log_queue = queue.Queue() # LOG_DIR = "/data" # BUFFER_FILE = os.path.join(LOG_DIR, "log_buffer.jsonl") # COMMIT_INTERVAL_SECONDS = 900 # every 15 minutes # def append_log(input_text: str, output_text: str): # os.makedirs(LOG_DIR, exist_ok=True) # record = { # "ts": datetime.utcnow().isoformat() + "Z", # "src_text": input_text[:500], # "tgt_text": output_text[:500], # "app": "zomi-translator", # "version": "1.0.0" # } # with open(BUFFER_FILE, "a", encoding="utf-8") as f: # f.write(json.dumps(record, ensure_ascii=False) + "\n") # def commit_logs_to_hf(manual=False): # if not HF_TOKEN or not os.path.exists(BUFFER_FILE): # return "No logs to commit." # if not os.path.exists(BUFFER_FILE) or os.path.getsize(BUFFER_FILE) < 10: # return "No new logs." # with open(BUFFER_FILE, "r", encoding="utf-8") as f: # data = f.read().strip() # if not data: # return "No new logs." # date_str = datetime.utcnow().strftime("%Y-%m-%d") # repo_path = f"logs/{date_str}.jsonl" # api.upload_file( # path_or_fileobj=BUFFER_FILE, # path_in_repo=repo_path, # repo_id=HF_DATASET, # repo_type="dataset", # commit_message="Manual log commit" if manual else "Auto daily log commit" # ) # # Clear buffer after successful commit # open(BUFFER_FILE, "w").close() # return f"Committed logs to {repo_path}" # def update_dataset_card_info(): # api = HfApi() # # Build the README.md content dynamically # readme = f""" # # Zomi Translator Logs # **License:** MIT # **Languages:** Zomi, English # Daily logs of Zomi ↔ English translations. # """ # api.create_commit( # repo_id=HF_DATASET, # repo_type="dataset", # commit_message="Update dataset card metadata", # operations=[ # CommitOperationAdd( # path_in_repo="README.md", # path_or_fileobj=readme.encode("utf-8") # ) # ] # ) # def manual_commit(password: str): # if not ADMIN_PASSWORD: # return "❌ Admin password not configured." # if password != ADMIN_PASSWORD: # return "❌ Invalid admin password." # result = commit_logs_to_hf(manual=True) # return f"✅ {result}" # def append_log_async(input_text, output_text, direction): # log_queue.put({ # "ts": datetime.utcnow().isoformat() + "Z", # # "src_text_hash": hashlib.sha256(input_text.encode("utf-8")).hexdigest(), # "src_text": input_text, # "tgt_text": output_text[:500], # "direction": direction, # "app": "zomi-translator", # "version": "1.0.0" # }) # def async_commit_worker(): # buffer = [] # while True: # try: # # Collect up to 50 logs or 60 seconds # start = time.time() # while len(buffer) < 50 and (time.time() - start < 60): # try: # buffer.append(log_queue.get(timeout=1)) # except queue.Empty: # pass # if buffer: # os.makedirs(LOG_DIR, exist_ok=True) # date_str = datetime.utcnow().strftime("%Y-%m-%d") # batch_file = os.path.join(LOG_DIR, f"{date_str}.jsonl") # with open(batch_file, "a", encoding="utf-8") as f: # for record in buffer: # f.write(json.dumps(record, ensure_ascii=False) + "\n") # commit_logs_to_hf() # commits the batch file # update_dataset_card_info() # buffer.clear() # except Exception as e: # print("Async commit failed:", e) # ############################################################### # app = FastAPI() # # Enable CORS # app.add_middleware( # CORSMiddleware, # allow_origins=["*"], # allow_credentials=True, # allow_methods=["*"], # allow_headers=["*"], # ) # # Initialize client once # translator_client = Client("Chatboong/Gemini_Translator") # def call_translator(text: str): # msg = f"Translate Zomi to English, if it is English translate it to Zomi: '{text}'\n" # stream = translator_client.predict( # message=msg, # lang="English", # is_streaming=True, # api_name="/chat", # ) # output = "" # for chunk in stream: # output += str(chunk) # # Remove prefix # prefix = "Translate Zomi to English, if it is English translate it to Zomi: " # prefix2 = "Zomi pan English in tei in, English ahih leh Zomi in tei in: " # prefix3 = 'Zomi-in tei in, English ahih leh Zomi-in tei in: ' # if output.startswith(prefix): # output = output[len(prefix):].strip() # # Remove surrounding quotes # if (output.startswith('"') and output.endswith('"')) or (output.startswith("'") and output.endswith("'")): # output = output[1:-1].strip() # elif output.startswith(prefix2): # output = output[len(prefix2):].strip() # # Remove surrounding quotes # if (output.startswith('"') and output.endswith('"')) or (output.startswith("'") and output.endswith("'")): # output = output[1:-1].strip() # elif output.startswith(prefix3): # output = output[len(prefix3):].strip() # # Remove surrounding quotes # if (output.startswith('"') and output.endswith('"')) or (output.startswith("'") and output.endswith("'")): # output = output[1:-1].strip() # append_log(text, output) # return output # def detect_direction(text: str) -> str: # try: # lang = detect(text) # if lang == "en": # return "en-zomi" # else: # return "zomi-en" # except: # return "en-zomi" # def translate_zomi(text: str): # direction = detect_direction(text) # output = call_translator(text) # your existing streaming code # append_log_async(text, output, direction) # return output # class ChatRequest(BaseModel): # message: str # @app.post("/chat") # async def chat_api(req: ChatRequest): # translation = translate_zomi(req.message) # return {"translation": translation} # def chat_ui(message: str): # return translate_zomi(message) # with gr.Blocks() as demo: # gr.Markdown("### Zomi Translator") # inp = gr.Textbox(label="Input") # out = gr.Textbox(label="Output") # inp.submit(chat_ui, inp, out) # if ADMIN_PASSWORD: # gr.Markdown("### Admin (Protected)") # admin_pw = gr.Textbox( # label="Admin Password", # type="password", # placeholder="Enter admin password" # ) # commit_btn = gr.Button("📦 Commit Logs Now") # status = gr.Textbox(label="Commit Status", interactive=False) # commit_btn.click( # manual_commit, # inputs=admin_pw, # outputs=status # ).then( # lambda: "", # None, # admin_pw # ) # if HF_TOKEN: # threading.Thread(target=async_commit_worker, daemon=True).start() # # Mount Gradio under the FastAPI app # app = gr.mount_gradio_app(app, demo, path="/") # if __name__ == "__main__": # # Respect PORT env var (used by Hugging Face Spaces) # port = int(os.getenv("PORT", "7860")) # uvicorn.run(app, host="0.0.0.0", port=port) import gradio as gr from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel from gradio_client import Client import uvicorn import os ############ logging, and committing translation ############## from huggingface_hub import HfApi, CommitOperationAdd, RepoCard import time from datetime import datetime from langdetect import detect import json import threading import queue import hashlib HF_DATASET = "Juna190825/zomi-translation-logs" HF_TOKEN = os.getenv("HF_TOKEN") ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD") api = HfApi(token=HF_TOKEN) log_queue = queue.Queue() LOG_DIR = "/data" BUFFER_FILE = os.path.join(LOG_DIR, "log_buffer.jsonl") COMMIT_INTERVAL_SECONDS = 900 # every 15 minutes def append_log(input_text: str, output_text: str): os.makedirs(LOG_DIR, exist_ok=True) record = { "ts": datetime.utcnow().isoformat() + "Z", "src_text": input_text[:500], "tgt_text": output_text[:500], "app": "zomi-translator", "version": "1.0.0" } with open(BUFFER_FILE, "a", encoding="utf-8") as f: f.write(json.dumps(record, ensure_ascii=False) + "\n") def commit_logs_to_hf(manual=False): if not HF_TOKEN or not os.path.exists(BUFFER_FILE): return "No logs to commit." if not os.path.exists(BUFFER_FILE) or os.path.getsize(BUFFER_FILE) < 10: return "No new logs." with open(BUFFER_FILE, "r", encoding="utf-8") as f: data = f.read().strip() if not data: return "No new logs." date_str = datetime.utcnow().strftime("%Y-%m-%d") # Add timestamp to make filename unique if manual: timestamp = datetime.utcnow().strftime("%Y%m%d-%H%M%S") repo_path = f"logs/manual_{date_str}_{timestamp}.jsonl" commit_msg = f"Manual log commit {timestamp}" else: timestamp = datetime.utcnow().strftime("%Y%m%d-%H%M%S") repo_path = f"logs/auto_{date_str}_{timestamp}.jsonl" commit_msg = f"Auto log commit {timestamp}" api.upload_file( path_or_fileobj=BUFFER_FILE, path_in_repo=repo_path, repo_id=HF_DATASET, repo_type="dataset", commit_message=commit_msg ) # Clear buffer after successful commit open(BUFFER_FILE, "w").close() return f"Committed logs to {repo_path}" def update_dataset_card_info(): # Build the README.md content dynamically readme_content = f""" # Zomi Translator Logs **License:** MIT **Languages:** Zomi, English Daily logs of Zomi ↔ English translations. """ # Create a RepoCard object card = RepoCard(readme_content) api = HfApi() # Upload README.md to the dataset repo api.upload_file( path_or_fileobj=card.content.encode("utf-8"), path_in_repo="README.md", repo_id=HF_DATASET, repo_type="dataset" ) def manual_commit(password: str): if not ADMIN_PASSWORD: return "❌ Admin password not configured." if password != ADMIN_PASSWORD: return "❌ Invalid admin password." result = commit_logs_to_hf(manual=True) return f"✅ {result}" def append_log_async(input_text, output_text, direction): log_queue.put({ "ts": datetime.utcnow().isoformat() + "Z", "src_text": input_text, "tgt_text": output_text[:500], "direction": direction, "app": "zomi-translator", "version": "1.0.0" }) def async_commit_worker(): buffer = [] while True: try: # Collect up to 50 logs or 60 seconds start = time.time() while len(buffer) < 50 and (time.time() - start < 60): try: buffer.append(log_queue.get(timeout=1)) except queue.Empty: pass if buffer: os.makedirs(LOG_DIR, exist_ok=True) date_str = datetime.utcnow().strftime("%Y-%m-%d") batch_file = os.path.join(LOG_DIR, f"{date_str}.jsonl") with open(batch_file, "a", encoding="utf-8") as f: for record in buffer: f.write(json.dumps(record, ensure_ascii=False) + "\n") commit_logs_to_hf() # commits the batch file update_dataset_card_info() buffer.clear() except Exception as e: print("Async commit failed:", e) ############################################################### app = FastAPI() # Enable CORS app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Initialize client once translator_client = Client("Chatboong/Gemini_Translator") def call_translator(text: str): msg = f"Translate Zomi to English, if it is English translate it to Zomi: '{text}'\n" stream = translator_client.predict( message=msg, lang="English", is_streaming=True, api_name="/chat", ) output = "" for chunk in stream: output += str(chunk) # Remove prefix prefix = "Translate Zomi to English, if it is English translate it to Zomi: " prefix2 = "Zomi pan English in tei in, English ahih leh Zomi in tei in: " prefix3 = 'Zomi-in tei in, English ahih leh Zomi-in tei in: ' if output.startswith(prefix): output = output[len(prefix):].strip() # Remove surrounding quotes if (output.startswith('"') and output.endswith('"')) or (output.startswith("'") and output.endswith("'")): output = output[1:-1].strip() elif output.startswith(prefix2): output = output[len(prefix2):].strip() # Remove surrounding quotes if (output.startswith('"') and output.endswith('"')) or (output.startswith("'") and output.endswith("'")): output = output[1:-1].strip() elif output.startswith(prefix3): output = output[len(prefix3):].strip() # Remove surrounding quotes if (output.startswith('"') and output.endswith('"')) or (output.startswith("'") and output.endswith("'")): output = output[1:-1].strip() append_log(text, output) return output def detect_direction(text: str) -> str: try: lang = detect(text) if lang == "en": return "en-zomi" else: return "zomi-en" except: return "en-zomi" def translate_zomi(text: str): direction = detect_direction(text) output = call_translator(text) # your existing streaming code append_log_async(text, output, direction) return output class ChatRequest(BaseModel): message: str @app.post("/chat") async def chat_api(req: ChatRequest): translation = translate_zomi(req.message) return {"translation": translation} def chat_ui(message: str): return translate_zomi(message) with gr.Blocks() as demo: gr.Markdown("### Zomi Translator") inp = gr.Textbox(label="Input") out = gr.Textbox(label="Output") inp.submit(chat_ui, inp, out) if ADMIN_PASSWORD: gr.Markdown("### Admin (Protected)") admin_pw = gr.Textbox( label="Admin Password", type="password", placeholder="Enter admin password" ) commit_btn = gr.Button("📦 Commit Logs Now") status = gr.Textbox(label="Commit Status", interactive=False) commit_btn.click( manual_commit, inputs=admin_pw, outputs=status ).then( lambda: "", None, admin_pw ) if HF_TOKEN: threading.Thread(target=async_commit_worker, daemon=True).start() # Mount Gradio under the FastAPI app app = gr.mount_gradio_app(app, demo, path="/") if __name__ == "__main__": # Respect PORT env var (used by Hugging Face Spaces) port = int(os.getenv("PORT", "7860")) uvicorn.run(app, host="0.0.0.0", port=port)