Spaces:
Sleeping
Sleeping
| # import gradio as gr | |
| # from fastapi import FastAPI | |
| # from fastapi.middleware.cors import CORSMiddleware | |
| # from pydantic import BaseModel | |
| # from gradio_client import Client | |
| # import uvicorn | |
| # import os | |
| # ############ logging, and committing translation ############## | |
| # from huggingface_hub import HfApi, CommitOperationAdd | |
| # import time | |
| # from datetime import datetime | |
| # from langdetect import detect | |
| # from huggingface_hub import update_dataset_card | |
| # import json | |
| # import threading | |
| # import queue | |
| # import hashlib | |
| # HF_DATASET = "Juna190825/zomi-translation-logs" | |
| # HF_TOKEN = os.getenv("HF_TOKEN") | |
| # ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD") | |
| # api = HfApi(token=HF_TOKEN) | |
| # log_queue = queue.Queue() | |
| # LOG_DIR = "/data" | |
| # BUFFER_FILE = os.path.join(LOG_DIR, "log_buffer.jsonl") | |
| # COMMIT_INTERVAL_SECONDS = 900 # every 15 minutes | |
| # def append_log(input_text: str, output_text: str): | |
| # os.makedirs(LOG_DIR, exist_ok=True) | |
| # record = { | |
| # "ts": datetime.utcnow().isoformat() + "Z", | |
| # "src_text": input_text[:500], | |
| # "tgt_text": output_text[:500], | |
| # "app": "zomi-translator", | |
| # "version": "1.0.0" | |
| # } | |
| # with open(BUFFER_FILE, "a", encoding="utf-8") as f: | |
| # f.write(json.dumps(record, ensure_ascii=False) + "\n") | |
| # def commit_logs_to_hf(manual=False): | |
| # if not HF_TOKEN or not os.path.exists(BUFFER_FILE): | |
| # return "No logs to commit." | |
| # if not os.path.exists(BUFFER_FILE) or os.path.getsize(BUFFER_FILE) < 10: | |
| # return "No new logs." | |
| # with open(BUFFER_FILE, "r", encoding="utf-8") as f: | |
| # data = f.read().strip() | |
| # if not data: | |
| # return "No new logs." | |
| # date_str = datetime.utcnow().strftime("%Y-%m-%d") | |
| # repo_path = f"logs/{date_str}.jsonl" | |
| # api.upload_file( | |
| # path_or_fileobj=BUFFER_FILE, | |
| # path_in_repo=repo_path, | |
| # repo_id=HF_DATASET, | |
| # repo_type="dataset", | |
| # commit_message="Manual log commit" if manual else "Auto daily log commit" | |
| # ) | |
| # # Clear buffer after successful commit | |
| # open(BUFFER_FILE, "w").close() | |
| # return f"Committed logs to {repo_path}" | |
| # def update_dataset_card_info(): | |
| # api = HfApi() | |
| # # Build the README.md content dynamically | |
| # readme = f""" | |
| # # Zomi Translator Logs | |
| # **License:** MIT | |
| # **Languages:** Zomi, English | |
| # Daily logs of Zomi β English translations. | |
| # """ | |
| # api.create_commit( | |
| # repo_id=HF_DATASET, | |
| # repo_type="dataset", | |
| # commit_message="Update dataset card metadata", | |
| # operations=[ | |
| # CommitOperationAdd( | |
| # path_in_repo="README.md", | |
| # path_or_fileobj=readme.encode("utf-8") | |
| # ) | |
| # ] | |
| # ) | |
| # def manual_commit(password: str): | |
| # if not ADMIN_PASSWORD: | |
| # return "β Admin password not configured." | |
| # if password != ADMIN_PASSWORD: | |
| # return "β Invalid admin password." | |
| # result = commit_logs_to_hf(manual=True) | |
| # return f"β {result}" | |
| # def append_log_async(input_text, output_text, direction): | |
| # log_queue.put({ | |
| # "ts": datetime.utcnow().isoformat() + "Z", | |
| # # "src_text_hash": hashlib.sha256(input_text.encode("utf-8")).hexdigest(), | |
| # "src_text": input_text, | |
| # "tgt_text": output_text[:500], | |
| # "direction": direction, | |
| # "app": "zomi-translator", | |
| # "version": "1.0.0" | |
| # }) | |
| # def async_commit_worker(): | |
| # buffer = [] | |
| # while True: | |
| # try: | |
| # # Collect up to 50 logs or 60 seconds | |
| # start = time.time() | |
| # while len(buffer) < 50 and (time.time() - start < 60): | |
| # try: | |
| # buffer.append(log_queue.get(timeout=1)) | |
| # except queue.Empty: | |
| # pass | |
| # if buffer: | |
| # os.makedirs(LOG_DIR, exist_ok=True) | |
| # date_str = datetime.utcnow().strftime("%Y-%m-%d") | |
| # batch_file = os.path.join(LOG_DIR, f"{date_str}.jsonl") | |
| # with open(batch_file, "a", encoding="utf-8") as f: | |
| # for record in buffer: | |
| # f.write(json.dumps(record, ensure_ascii=False) + "\n") | |
| # commit_logs_to_hf() # commits the batch file | |
| # update_dataset_card_info() | |
| # buffer.clear() | |
| # except Exception as e: | |
| # print("Async commit failed:", e) | |
| # ############################################################### | |
| # app = FastAPI() | |
| # # Enable CORS | |
| # app.add_middleware( | |
| # CORSMiddleware, | |
| # allow_origins=["*"], | |
| # allow_credentials=True, | |
| # allow_methods=["*"], | |
| # allow_headers=["*"], | |
| # ) | |
| # # Initialize client once | |
| # translator_client = Client("Chatboong/Gemini_Translator") | |
| # def call_translator(text: str): | |
| # msg = f"Translate Zomi to English, if it is English translate it to Zomi: '{text}'\n" | |
| # stream = translator_client.predict( | |
| # message=msg, | |
| # lang="English", | |
| # is_streaming=True, | |
| # api_name="/chat", | |
| # ) | |
| # output = "" | |
| # for chunk in stream: | |
| # output += str(chunk) | |
| # # Remove prefix | |
| # prefix = "Translate Zomi to English, if it is English translate it to Zomi: " | |
| # prefix2 = "Zomi pan English in tei in, English ahih leh Zomi in tei in: " | |
| # prefix3 = 'Zomi-in tei in, English ahih leh Zomi-in tei in: ' | |
| # if output.startswith(prefix): | |
| # output = output[len(prefix):].strip() | |
| # # Remove surrounding quotes | |
| # if (output.startswith('"') and output.endswith('"')) or (output.startswith("'") and output.endswith("'")): | |
| # output = output[1:-1].strip() | |
| # elif output.startswith(prefix2): | |
| # output = output[len(prefix2):].strip() | |
| # # Remove surrounding quotes | |
| # if (output.startswith('"') and output.endswith('"')) or (output.startswith("'") and output.endswith("'")): | |
| # output = output[1:-1].strip() | |
| # elif output.startswith(prefix3): | |
| # output = output[len(prefix3):].strip() | |
| # # Remove surrounding quotes | |
| # if (output.startswith('"') and output.endswith('"')) or (output.startswith("'") and output.endswith("'")): | |
| # output = output[1:-1].strip() | |
| # append_log(text, output) | |
| # return output | |
| # def detect_direction(text: str) -> str: | |
| # try: | |
| # lang = detect(text) | |
| # if lang == "en": | |
| # return "en-zomi" | |
| # else: | |
| # return "zomi-en" | |
| # except: | |
| # return "en-zomi" | |
| # def translate_zomi(text: str): | |
| # direction = detect_direction(text) | |
| # output = call_translator(text) # your existing streaming code | |
| # append_log_async(text, output, direction) | |
| # return output | |
| # class ChatRequest(BaseModel): | |
| # message: str | |
| # @app.post("/chat") | |
| # async def chat_api(req: ChatRequest): | |
| # translation = translate_zomi(req.message) | |
| # return {"translation": translation} | |
| # def chat_ui(message: str): | |
| # return translate_zomi(message) | |
| # with gr.Blocks() as demo: | |
| # gr.Markdown("### Zomi Translator") | |
| # inp = gr.Textbox(label="Input") | |
| # out = gr.Textbox(label="Output") | |
| # inp.submit(chat_ui, inp, out) | |
| # if ADMIN_PASSWORD: | |
| # gr.Markdown("### Admin (Protected)") | |
| # admin_pw = gr.Textbox( | |
| # label="Admin Password", | |
| # type="password", | |
| # placeholder="Enter admin password" | |
| # ) | |
| # commit_btn = gr.Button("π¦ Commit Logs Now") | |
| # status = gr.Textbox(label="Commit Status", interactive=False) | |
| # commit_btn.click( | |
| # manual_commit, | |
| # inputs=admin_pw, | |
| # outputs=status | |
| # ).then( | |
| # lambda: "", | |
| # None, | |
| # admin_pw | |
| # ) | |
| # if HF_TOKEN: | |
| # threading.Thread(target=async_commit_worker, daemon=True).start() | |
| # # Mount Gradio under the FastAPI app | |
| # app = gr.mount_gradio_app(app, demo, path="/") | |
| # if __name__ == "__main__": | |
| # # Respect PORT env var (used by Hugging Face Spaces) | |
| # port = int(os.getenv("PORT", "7860")) | |
| # uvicorn.run(app, host="0.0.0.0", port=port) | |
| import gradio as gr | |
| from fastapi import FastAPI | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from pydantic import BaseModel | |
| from gradio_client import Client | |
| import uvicorn | |
| import os | |
| ############ logging, and committing translation ############## | |
| from huggingface_hub import HfApi, CommitOperationAdd, RepoCard | |
| import time | |
| from datetime import datetime | |
| from langdetect import detect | |
| import json | |
| import threading | |
| import queue | |
| import hashlib | |
| HF_DATASET = "Juna190825/zomi-translation-logs" | |
| HF_TOKEN = os.getenv("HF_TOKEN") | |
| ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD") | |
| api = HfApi(token=HF_TOKEN) | |
| log_queue = queue.Queue() | |
| LOG_DIR = "/data" | |
| BUFFER_FILE = os.path.join(LOG_DIR, "log_buffer.jsonl") | |
| COMMIT_INTERVAL_SECONDS = 900 # every 15 minutes | |
| def append_log(input_text: str, output_text: str): | |
| os.makedirs(LOG_DIR, exist_ok=True) | |
| record = { | |
| "ts": datetime.utcnow().isoformat() + "Z", | |
| "src_text": input_text[:500], | |
| "tgt_text": output_text[:500], | |
| "app": "zomi-translator", | |
| "version": "1.0.0" | |
| } | |
| with open(BUFFER_FILE, "a", encoding="utf-8") as f: | |
| f.write(json.dumps(record, ensure_ascii=False) + "\n") | |
| def commit_logs_to_hf(manual=False): | |
| if not HF_TOKEN or not os.path.exists(BUFFER_FILE): | |
| return "No logs to commit." | |
| if not os.path.exists(BUFFER_FILE) or os.path.getsize(BUFFER_FILE) < 10: | |
| return "No new logs." | |
| with open(BUFFER_FILE, "r", encoding="utf-8") as f: | |
| data = f.read().strip() | |
| if not data: | |
| return "No new logs." | |
| date_str = datetime.utcnow().strftime("%Y-%m-%d") | |
| # Add timestamp to make filename unique | |
| if manual: | |
| timestamp = datetime.utcnow().strftime("%Y%m%d-%H%M%S") | |
| repo_path = f"logs/manual_{date_str}_{timestamp}.jsonl" | |
| commit_msg = f"Manual log commit {timestamp}" | |
| else: | |
| timestamp = datetime.utcnow().strftime("%Y%m%d-%H%M%S") | |
| repo_path = f"logs/auto_{date_str}_{timestamp}.jsonl" | |
| commit_msg = f"Auto log commit {timestamp}" | |
| api.upload_file( | |
| path_or_fileobj=BUFFER_FILE, | |
| path_in_repo=repo_path, | |
| repo_id=HF_DATASET, | |
| repo_type="dataset", | |
| commit_message=commit_msg | |
| ) | |
| # Clear buffer after successful commit | |
| open(BUFFER_FILE, "w").close() | |
| return f"Committed logs to {repo_path}" | |
| def update_dataset_card_info(): | |
| # Build the README.md content dynamically | |
| readme_content = f""" | |
| # Zomi Translator Logs | |
| **License:** MIT | |
| **Languages:** Zomi, English | |
| Daily logs of Zomi β English translations. | |
| """ | |
| # Create a RepoCard object | |
| card = RepoCard(readme_content) | |
| api = HfApi() | |
| # Upload README.md to the dataset repo | |
| api.upload_file( | |
| path_or_fileobj=card.content.encode("utf-8"), | |
| path_in_repo="README.md", | |
| repo_id=HF_DATASET, | |
| repo_type="dataset" | |
| ) | |
| def manual_commit(password: str): | |
| if not ADMIN_PASSWORD: | |
| return "β Admin password not configured." | |
| if password != ADMIN_PASSWORD: | |
| return "β Invalid admin password." | |
| result = commit_logs_to_hf(manual=True) | |
| return f"β {result}" | |
| def append_log_async(input_text, output_text, direction): | |
| log_queue.put({ | |
| "ts": datetime.utcnow().isoformat() + "Z", | |
| "src_text": input_text, | |
| "tgt_text": output_text[:500], | |
| "direction": direction, | |
| "app": "zomi-translator", | |
| "version": "1.0.0" | |
| }) | |
| def async_commit_worker(): | |
| buffer = [] | |
| while True: | |
| try: | |
| # Collect up to 50 logs or 60 seconds | |
| start = time.time() | |
| while len(buffer) < 50 and (time.time() - start < 60): | |
| try: | |
| buffer.append(log_queue.get(timeout=1)) | |
| except queue.Empty: | |
| pass | |
| if buffer: | |
| os.makedirs(LOG_DIR, exist_ok=True) | |
| date_str = datetime.utcnow().strftime("%Y-%m-%d") | |
| batch_file = os.path.join(LOG_DIR, f"{date_str}.jsonl") | |
| with open(batch_file, "a", encoding="utf-8") as f: | |
| for record in buffer: | |
| f.write(json.dumps(record, ensure_ascii=False) + "\n") | |
| commit_logs_to_hf() # commits the batch file | |
| update_dataset_card_info() | |
| buffer.clear() | |
| except Exception as e: | |
| print("Async commit failed:", e) | |
| ############################################################### | |
| app = FastAPI() | |
| # Enable CORS | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # Initialize client once | |
| translator_client = Client("Chatboong/Gemini_Translator") | |
| def call_translator(text: str): | |
| msg = f"Translate Zomi to English, if it is English translate it to Zomi: '{text}'\n" | |
| stream = translator_client.predict( | |
| message=msg, | |
| lang="English", | |
| is_streaming=True, | |
| api_name="/chat", | |
| ) | |
| output = "" | |
| for chunk in stream: | |
| output += str(chunk) | |
| # Remove prefix | |
| prefix = "Translate Zomi to English, if it is English translate it to Zomi: " | |
| prefix2 = "Zomi pan English in tei in, English ahih leh Zomi in tei in: " | |
| prefix3 = 'Zomi-in tei in, English ahih leh Zomi-in tei in: ' | |
| if output.startswith(prefix): | |
| output = output[len(prefix):].strip() | |
| # Remove surrounding quotes | |
| if (output.startswith('"') and output.endswith('"')) or (output.startswith("'") and output.endswith("'")): | |
| output = output[1:-1].strip() | |
| elif output.startswith(prefix2): | |
| output = output[len(prefix2):].strip() | |
| # Remove surrounding quotes | |
| if (output.startswith('"') and output.endswith('"')) or (output.startswith("'") and output.endswith("'")): | |
| output = output[1:-1].strip() | |
| elif output.startswith(prefix3): | |
| output = output[len(prefix3):].strip() | |
| # Remove surrounding quotes | |
| if (output.startswith('"') and output.endswith('"')) or (output.startswith("'") and output.endswith("'")): | |
| output = output[1:-1].strip() | |
| append_log(text, output) | |
| return output | |
| def detect_direction(text: str) -> str: | |
| try: | |
| lang = detect(text) | |
| if lang == "en": | |
| return "en-zomi" | |
| else: | |
| return "zomi-en" | |
| except: | |
| return "en-zomi" | |
| def translate_zomi(text: str): | |
| direction = detect_direction(text) | |
| output = call_translator(text) # your existing streaming code | |
| append_log_async(text, output, direction) | |
| return output | |
| class ChatRequest(BaseModel): | |
| message: str | |
| async def chat_api(req: ChatRequest): | |
| translation = translate_zomi(req.message) | |
| return {"translation": translation} | |
| def chat_ui(message: str): | |
| return translate_zomi(message) | |
| with gr.Blocks() as demo: | |
| gr.Markdown("### Zomi Translator") | |
| inp = gr.Textbox(label="Input") | |
| out = gr.Textbox(label="Output") | |
| inp.submit(chat_ui, inp, out) | |
| if ADMIN_PASSWORD: | |
| gr.Markdown("### Admin (Protected)") | |
| admin_pw = gr.Textbox( | |
| label="Admin Password", | |
| type="password", | |
| placeholder="Enter admin password" | |
| ) | |
| commit_btn = gr.Button("π¦ Commit Logs Now") | |
| status = gr.Textbox(label="Commit Status", interactive=False) | |
| commit_btn.click( | |
| manual_commit, | |
| inputs=admin_pw, | |
| outputs=status | |
| ).then( | |
| lambda: "", | |
| None, | |
| admin_pw | |
| ) | |
| if HF_TOKEN: | |
| threading.Thread(target=async_commit_worker, daemon=True).start() | |
| # Mount Gradio under the FastAPI app | |
| app = gr.mount_gradio_app(app, demo, path="/") | |
| if __name__ == "__main__": | |
| # Respect PORT env var (used by Hugging Face Spaces) | |
| port = int(os.getenv("PORT", "7860")) | |
| uvicorn.run(app, host="0.0.0.0", port=port) |