| # import streamlit as st | |
| # import pandas as pd | |
| # import time | |
| # from datetime import datetime | |
| # import os | |
| # import json | |
| # # Try to import Google Sheets libraries | |
| # try: | |
| # import gspread | |
| # from oauth2client.service_account import ServiceAccountCredentials | |
| # GSHEETS_AVAILABLE = True | |
| # except ImportError: | |
| # GSHEETS_AVAILABLE = False | |
| # # --- CONFIGURATION --- | |
| # st.set_page_config( | |
| # page_title="Tagin Feedback Loop", | |
| # page_icon="๐", | |
| # layout="centered", | |
| # initial_sidebar_state="expanded" | |
| # ) | |
| # # --- SESSION STATE INITIALIZATION --- | |
| # if "translations_list" not in st.session_state: | |
| # st.session_state.translations_list = [] | |
| # if "source_text" not in st.session_state: | |
| # st.session_state.source_text = "" | |
| # # --- MODEL LOADING LOGIC --- | |
| # @st.cache_resource | |
| # def load_model(model_path): | |
| # """ | |
| # Loads the mBART model and tokenizer from a local directory or HuggingFace Hub. | |
| # """ | |
| # try: | |
| # from transformers import MBartForConditionalGeneration, MBart50TokenizerFast | |
| # import torch | |
| # tokenizer = MBart50TokenizerFast.from_pretrained(model_path) | |
| # model = MBartForConditionalGeneration.from_pretrained(model_path) | |
| # return tokenizer, model, None | |
| # except Exception as e: | |
| # return None, None, str(e) | |
| # def perform_translation_beam(text, source_lang, target_lang, model, tokenizer, num_beams=5): | |
| # """ | |
| # Translates text returning top N hypotheses using beam search. | |
| # """ | |
| # if not text: | |
| # return [] | |
| # lang_map = { | |
| # "English": "en_XX", | |
| # "Tagin": "<tgj_IN>" | |
| # } | |
| # src_code = lang_map.get(source_lang) | |
| # tgt_code = lang_map.get(target_lang) | |
| # try: | |
| # tokenizer.src_lang = src_code | |
| # encoded_input = tokenizer(text, return_tensors="pt") | |
| # if tgt_code in tokenizer.lang_code_to_id: | |
| # forced_bos_id = tokenizer.lang_code_to_id[tgt_code] | |
| # else: | |
| # forced_bos_id = tokenizer.convert_tokens_to_ids(tgt_code) | |
| # generated_tokens = model.generate( | |
| # **encoded_input, | |
| # forced_bos_token_id=forced_bos_id, | |
| # num_beams=num_beams, | |
| # num_return_sequences=num_beams, | |
| # max_length=128 | |
| # ) | |
| # translations = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True) | |
| # return translations | |
| # except Exception as e: | |
| # st.error(f"Translation Error: {str(e)}") | |
| # return [] | |
| # def save_to_gsheet(data_row, creds_dict, sheet_name): | |
| # """Saves data to Google Sheets.""" | |
| # if not GSHEETS_AVAILABLE: | |
| # return False, "Libraries 'gspread' and 'oauth2client' not installed." | |
| # try: | |
| # # Define scope | |
| # scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive'] | |
| # # Authenticate using the dictionary (from secrets or file) | |
| # creds = ServiceAccountCredentials.from_json_keyfile_dict(creds_dict, scope) | |
| # client = gspread.authorize(creds) | |
| # # Open the sheet | |
| # try: | |
| # sheet = client.open(sheet_name).sheet1 | |
| # except gspread.SpreadsheetNotFound: | |
| # return False, f"Spreadsheet '{sheet_name}' not found. Please share it with the service account email." | |
| # # Check if header exists (simple check: is cell A1 empty?) | |
| # if not sheet.cell(1, 1).value: | |
| # sheet.append_row(['timestamp', 'source_lang', 'target_lang', 'source_text', 'corrected_translation']) | |
| # # Append data | |
| # sheet.append_row(data_row) | |
| # return True, f"Saved to Google Sheet '{sheet_name}'" | |
| # except Exception as e: | |
| # return False, str(e) | |
| # # --- SIDEBAR --- | |
| # with st.sidebar: | |
| # st.title("โ๏ธ Configuration") | |
| # # Fixed Model Path | |
| # st.markdown("**Active Model:**") | |
| # model_path_input = "Repleeka/mBART-tgj-final" | |
| # st.code(model_path_input, language=None) | |
| # st.divider() | |
| # st.subheader("๐พ Storage Settings") | |
| # st.caption("All corrections are saved to Google Sheets.") | |
| # gsheet_creds = None | |
| # # Check dependencies | |
| # if not GSHEETS_AVAILABLE: | |
| # st.error("โ ๏ธ Install gspread: `pip install gspread oauth2client`") | |
| # # Fixed Google Sheet Name | |
| # gsheet_name = "GinLish_Corpus_BOT" | |
| # st.markdown("**Target Database:**") | |
| # st.info(f"๐ {gsheet_name}") | |
| # # SECURITY UPDATE: Check for Secrets first (HuggingFace Spaces / Streamlit Cloud) | |
| # # We look for a secret named "GSHEET_CREDENTIALS" containing the JSON string | |
| # if "GSHEET_CREDENTIALS" in os.environ: | |
| # try: | |
| # gsheet_creds = json.loads(os.environ["GSHEET_CREDENTIALS"]) | |
| # st.success("โ Credentials loaded from Environment Secrets") | |
| # except Exception as e: | |
| # st.error(f"Error loading secrets: {e}") | |
| # elif "gcp_service_account" in st.secrets: | |
| # # Support for Streamlit Cloud native secrets | |
| # gsheet_creds = st.secrets["gcp_service_account"] | |
| # st.success("โ Credentials loaded from Streamlit Secrets") | |
| # else: | |
| # # Fallback to file uploader for local testing | |
| # uploaded_file = st.file_uploader("Service Account JSON", type=['json'], help="Upload for local testing. In prod, use Secrets.") | |
| # if uploaded_file is not None: | |
| # try: | |
| # gsheet_creds = json.load(uploaded_file) | |
| # st.success("Credentials loaded from file!") | |
| # except: | |
| # st.error("Invalid JSON file") | |
| # st.divider() | |
| # # --- MAIN INTERFACE --- | |
| # st.title("โ๏ธ English-to-Tagin Translator & Corrector") | |
| # st.markdown("Generate multiple hypotheses, choose the best one, and save it for retraining.") | |
| # # Load Model | |
| # tokenizer, model, error_msg = load_model(model_path_input) | |
| # if error_msg: | |
| # st.error(f"โ Model Error: {error_msg}") | |
| # else: | |
| # # 1. Input Section | |
| # st.subheader("Source Text") | |
| # col_lang1, col_lang2 = st.columns(2) | |
| # with col_lang1: | |
| # source_lang = st.selectbox("Source", ["English", "Tagin"]) | |
| # with col_lang2: | |
| # target_lang = st.selectbox("Target", ["English", "Tagin"], index=1 if source_lang=="English" else 0) | |
| # input_text = st.text_area( | |
| # "Input", | |
| # height=100, | |
| # label_visibility="collapsed", | |
| # placeholder="Enter text to translate...", | |
| # key="main_input" | |
| # ) | |
| # if st.button("Translate with Beam Search ๐", type="primary", use_container_width=True): | |
| # if input_text: | |
| # with st.spinner("Generating top 5 hypotheses..."): | |
| # results = perform_translation_beam(input_text, source_lang, target_lang, model, tokenizer) | |
| # st.session_state.translations_list = results | |
| # st.session_state.source_text = input_text # Lock in source text | |
| # else: | |
| # st.warning("Please enter some text.") | |
| # st.divider() | |
| # # 2. Results & Selection Section | |
| # if st.session_state.translations_list: | |
| # st.subheader("Select Best Translation") | |
| # options = st.session_state.translations_list | |
| # radio_options = [f"{i+1}. {text}" for i, text in enumerate(options)] | |
| # selected_option_str = st.radio( | |
| # "Top 5 Hypotheses (AI Suggestions):", | |
| # options=radio_options, | |
| # index=0 | |
| # ) | |
| # selected_index = radio_options.index(selected_option_str) | |
| # final_candidate = options[selected_index] | |
| # st.markdown("#### Review & Edit Final Output") | |
| # st.caption("If none of the above are perfect, edit the text below before saving.") | |
| # final_correction = st.text_area("Final Output", value=final_candidate, height=100) | |
| # col_save, col_status = st.columns([1, 2]) | |
| # with col_save: | |
| # if st.button("๐พ Save to Dataset", type="primary"): | |
| # # Prepare Data Row | |
| # timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
| # # Simplified language codes for storage | |
| # s_code = "en_XX" if source_lang == "English" else "tgj_IN" | |
| # t_code = "tgj_IN" if target_lang == "Tagin" else "en_XX" | |
| # data_row = [timestamp, s_code, t_code, st.session_state.source_text, final_correction] | |
| # # Execute Save Strategy | |
| # success = False | |
| # msg = "" | |
| # if gsheet_creds and gsheet_name: | |
| # with st.spinner("Connecting to Google Sheets..."): | |
| # success, msg = save_to_gsheet(data_row, gsheet_creds, gsheet_name) | |
| # else: | |
| # msg = "Missing Credentials or Sheet Name." | |
| # if success: | |
| # st.success(f"Saved! ({msg})") | |
| # time.sleep(1) | |
| # st.rerun() | |
| # else: | |
| # st.error(f"Save Failed: {msg}") | |
| # with col_status: | |
| # st.caption(f"Saving to Google Sheet: `{gsheet_name}`") | |
| # elif input_text: | |
| # st.info("Hit 'Translate' to see suggestions.") | |
| # # Create some space between main sidebar content and footer | |
| # st.sidebar.markdown("<br>" * 5, unsafe_allow_html=True) | |
| # st.sidebar.markdown("---") | |
| # st.sidebar.caption("Made with โค๏ธ by Tungon Dugi") | |
| # st.sidebar.caption("Contact: tungondugi@gmail.com") | |
| # # Or using columns in sidebar: | |
| # col1, col2 = st.sidebar.columns(2) | |
| # with col1: | |
| # st.caption("ยฉ 2026") | |
| # with col2: | |
| # st.caption("v0.1.1") | |
| import streamlit as st | |
| import pandas as pd | |
| import time | |
| from datetime import datetime | |
| import os | |
| import json | |
| # Try to import Google Sheets libraries | |
| try: | |
| import gspread | |
| from oauth2client.service_account import ServiceAccountCredentials | |
| GSHEETS_AVAILABLE = True | |
| except ImportError: | |
| GSHEETS_AVAILABLE = False | |
| # --- CONFIGURATION --- | |
| st.set_page_config( | |
| page_title="Tagin Feedback Loop", | |
| page_icon="๐", | |
| layout="centered", | |
| initial_sidebar_state="expanded" | |
| ) | |
| # --- SESSION STATE INITIALIZATION --- | |
| if "translations_list" not in st.session_state: | |
| st.session_state.translations_list = [] | |
| if "source_text" not in st.session_state: | |
| st.session_state.source_text = "" | |
| # --- MODEL LOADING LOGIC --- | |
| def load_model(model_path): | |
| """ | |
| Loads the mBART model and tokenizer from a local directory or HuggingFace Hub. | |
| """ | |
| try: | |
| from transformers import MBartForConditionalGeneration, MBart50TokenizerFast | |
| import torch | |
| tokenizer = MBart50TokenizerFast.from_pretrained(model_path) | |
| model = MBartForConditionalGeneration.from_pretrained(model_path) | |
| return tokenizer, model, None | |
| except Exception as e: | |
| return None, None, str(e) | |
| def perform_translation_beam(text, source_lang, target_lang, model, tokenizer, num_beams=5): | |
| """ | |
| Translates text returning top N hypotheses using beam search. | |
| """ | |
| if not text: | |
| return [] | |
| lang_map = { | |
| "English": "en_XX", | |
| "Tagin": "<tgj_IN>" | |
| } | |
| src_code = lang_map.get(source_lang) | |
| tgt_code = lang_map.get(target_lang) | |
| try: | |
| tokenizer.src_lang = src_code | |
| encoded_input = tokenizer(text, return_tensors="pt") | |
| if tgt_code in tokenizer.lang_code_to_id: | |
| forced_bos_id = tokenizer.lang_code_to_id[tgt_code] | |
| else: | |
| forced_bos_id = tokenizer.convert_tokens_to_ids(tgt_code) | |
| generated_tokens = model.generate( | |
| **encoded_input, | |
| forced_bos_token_id=forced_bos_id, | |
| num_beams=num_beams, | |
| num_return_sequences=num_beams, | |
| max_length=128 | |
| ) | |
| translations = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True) | |
| return translations | |
| except Exception as e: | |
| st.error(f"Translation Error: {str(e)}") | |
| return [] | |
| def save_to_gsheet(data_row, creds_dict, sheet_name): | |
| """Saves data to Google Sheets.""" | |
| if not GSHEETS_AVAILABLE: | |
| return False, "Libraries 'gspread' and 'oauth2client' not installed." | |
| try: | |
| scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive'] | |
| creds = ServiceAccountCredentials.from_json_keyfile_dict(creds_dict, scope) | |
| client = gspread.authorize(creds) | |
| try: | |
| sheet = client.open(sheet_name).sheet1 | |
| except gspread.SpreadsheetNotFound: | |
| return False, f"Spreadsheet '{sheet_name}' not found." | |
| if not sheet.cell(1, 1).value: | |
| sheet.append_row(['timestamp', 'source_lang', 'target_lang', 'source_text', 'corrected_translation']) | |
| sheet.append_row(data_row) | |
| return True, f"Saved to Google Sheet '{sheet_name}'" | |
| except Exception as e: | |
| return False, str(e) | |
| # --- SIDEBAR --- | |
| with st.sidebar: | |
| st.title("โ๏ธ Configuration") | |
| st.markdown("**Active Model:**") | |
| model_path_input = "Repleeka/mBART-tgj-final" | |
| st.code(model_path_input, language=None) | |
| st.divider() | |
| st.subheader("๐พ Storage Settings") | |
| gsheet_creds = None | |
| if not GSHEETS_AVAILABLE: | |
| st.error("โ ๏ธ Install dependencies: `pip install gspread oauth2client`") | |
| gsheet_name = "GinLish_Corpus_BOT" | |
| st.info(f"๐ {gsheet_name}") | |
| if "GSHEET_CREDENTIALS" in os.environ: | |
| gsheet_creds = json.loads(os.environ["GSHEET_CREDENTIALS"]) | |
| st.success("โ Credentials loaded (Env)") | |
| elif "gcp_service_account" in st.secrets: | |
| gsheet_creds = st.secrets["gcp_service_account"] | |
| st.success("โ Credentials loaded (Secrets)") | |
| else: | |
| uploaded_file = st.file_uploader("Service Account JSON", type=['json']) | |
| if uploaded_file: | |
| gsheet_creds = json.load(uploaded_file) | |
| # --- MAIN INTERFACE --- | |
| st.title("โ๏ธ English-to-Tagin Translator") | |
| tokenizer, model, error_msg = load_model(model_path_input) | |
| if error_msg: | |
| st.error(f"โ Model Error: {error_msg}") | |
| else: | |
| st.subheader("Source Text") | |
| # Use a form to allow 'Enter' key submission | |
| with st.form("translation_form", clear_on_submit=False): | |
| col_lang1, col_lang2 = st.columns(2) | |
| with col_lang1: | |
| source_lang = st.selectbox("Source", ["English", "Tagin"]) | |
| with col_lang2: | |
| target_lang = st.selectbox("Target", ["English", "Tagin"], index=1 if source_lang=="English" else 0) | |
| input_text = st.text_area( | |
| "Input", | |
| height=100, | |
| label_visibility="collapsed", | |
| placeholder="Enter text and press Enter (or click below) to translate...", | |
| key="main_input" | |
| ) | |
| submit_button = st.form_submit_button("Translate with Beam Search ๐", type="primary", use_container_width=True) | |
| # Process translation when button is clicked OR Enter is pressed | |
| if submit_button: | |
| if input_text: | |
| # Auto-log raw input | |
| if gsheet_creds and gsheet_name: | |
| try: | |
| log_timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
| log_s_code = "en_XX" if source_lang == "English" else "tgj_IN" | |
| log_t_code = "tgj_IN" if target_lang == "Tagin" else "en_XX" | |
| log_row = [log_timestamp, log_s_code, log_t_code, input_text, "[RAW_INPUT]"] | |
| save_to_gsheet(log_row, gsheet_creds, gsheet_name) | |
| except: | |
| pass | |
| with st.spinner("Generating hypotheses..."): | |
| results = perform_translation_beam(input_text, source_lang, target_lang, model, tokenizer) | |
| st.session_state.translations_list = results | |
| st.session_state.source_text = input_text | |
| else: | |
| st.warning("Please enter some text.") | |
| st.divider() | |
| # 2. Results & Selection Section | |
| if st.session_state.translations_list: | |
| st.subheader("Select Best Translation") | |
| options = st.session_state.translations_list | |
| radio_options = [f"{i+1}. {text}" for i, text in enumerate(options)] | |
| selected_option_str = st.radio( | |
| "Top 5 Hypotheses:", | |
| options=radio_options, | |
| index=0 | |
| ) | |
| selected_index = radio_options.index(selected_option_str) | |
| final_candidate = options[selected_index] | |
| st.markdown("#### Review & Edit Final Output") | |
| final_correction = st.text_area("Final Output", value=final_candidate, height=100) | |
| col_save, col_status = st.columns([1, 2]) | |
| with col_save: | |
| if st.button("๐พ Save to Dataset", type="primary"): | |
| timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
| s_code = "en_XX" if source_lang == "English" else "tgj_IN" | |
| t_code = "tgj_IN" if target_lang == "Tagin" else "en_XX" | |
| data_row = [timestamp, s_code, t_code, st.session_state.source_text, final_correction] | |
| if gsheet_creds and gsheet_name: | |
| with st.spinner("Saving..."): | |
| success, msg = save_to_gsheet(data_row, gsheet_creds, gsheet_name) | |
| if success: | |
| st.success("Saved!") | |
| time.sleep(1) | |
| st.rerun() | |
| else: | |
| st.error(f"Error: {msg}") | |
| else: | |
| st.error("Missing configuration.") | |
| elif input_text: | |
| st.info("Hit 'Enter' or click 'Translate' to see suggestions.") | |
| st.sidebar.markdown("<br>" * 5, unsafe_allow_html=True) | |
| st.sidebar.markdown("---") | |
| st.sidebar.caption("Made with โค๏ธ by Tungon Dugi") | |
| st.sidebar.caption("v0.1.2") |