# import streamlit as st # import pandas as pd # import time # from datetime import datetime # import os # import json # # Try to import Google Sheets libraries # try: # import gspread # from oauth2client.service_account import ServiceAccountCredentials # GSHEETS_AVAILABLE = True # except ImportError: # GSHEETS_AVAILABLE = False # # --- CONFIGURATION --- # st.set_page_config( # page_title="Tagin Feedback Loop", # page_icon="📝", # layout="centered", # initial_sidebar_state="expanded" # ) # # --- SESSION STATE INITIALIZATION --- # if "translations_list" not in st.session_state: # st.session_state.translations_list = [] # if "source_text" not in st.session_state: # st.session_state.source_text = "" # # --- MODEL LOADING LOGIC --- # @st.cache_resource # def load_model(model_path): # """ # Loads the mBART model and tokenizer from a local directory or HuggingFace Hub. # """ # try: # from transformers import MBartForConditionalGeneration, MBart50TokenizerFast # import torch # tokenizer = MBart50TokenizerFast.from_pretrained(model_path) # model = MBartForConditionalGeneration.from_pretrained(model_path) # return tokenizer, model, None # except Exception as e: # return None, None, str(e) # def perform_translation_beam(text, source_lang, target_lang, model, tokenizer, num_beams=5): # """ # Translates text returning top N hypotheses using beam search. # """ # if not text: # return [] # lang_map = { # "English": "en_XX", # "Tagin": "" # } # src_code = lang_map.get(source_lang) # tgt_code = lang_map.get(target_lang) # try: # tokenizer.src_lang = src_code # encoded_input = tokenizer(text, return_tensors="pt") # if tgt_code in tokenizer.lang_code_to_id: # forced_bos_id = tokenizer.lang_code_to_id[tgt_code] # else: # forced_bos_id = tokenizer.convert_tokens_to_ids(tgt_code) # generated_tokens = model.generate( # **encoded_input, # forced_bos_token_id=forced_bos_id, # num_beams=num_beams, # num_return_sequences=num_beams, # max_length=128 # ) # translations = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True) # return translations # except Exception as e: # st.error(f"Translation Error: {str(e)}") # return [] # def save_to_gsheet(data_row, creds_dict, sheet_name): # """Saves data to Google Sheets.""" # if not GSHEETS_AVAILABLE: # return False, "Libraries 'gspread' and 'oauth2client' not installed." # try: # # Define scope # scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive'] # # Authenticate using the dictionary (from secrets or file) # creds = ServiceAccountCredentials.from_json_keyfile_dict(creds_dict, scope) # client = gspread.authorize(creds) # # Open the sheet # try: # sheet = client.open(sheet_name).sheet1 # except gspread.SpreadsheetNotFound: # return False, f"Spreadsheet '{sheet_name}' not found. Please share it with the service account email." # # Check if header exists (simple check: is cell A1 empty?) # if not sheet.cell(1, 1).value: # sheet.append_row(['timestamp', 'source_lang', 'target_lang', 'source_text', 'corrected_translation']) # # Append data # sheet.append_row(data_row) # return True, f"Saved to Google Sheet '{sheet_name}'" # except Exception as e: # return False, str(e) # # --- SIDEBAR --- # with st.sidebar: # st.title("⚙️ Configuration") # # Fixed Model Path # st.markdown("**Active Model:**") # model_path_input = "Repleeka/mBART-tgj-final" # st.code(model_path_input, language=None) # st.divider() # st.subheader("💾 Storage Settings") # st.caption("All corrections are saved to Google Sheets.") # gsheet_creds = None # # Check dependencies # if not GSHEETS_AVAILABLE: # st.error("⚠️ Install gspread: `pip install gspread oauth2client`") # # Fixed Google Sheet Name # gsheet_name = "GinLish_Corpus_BOT" # st.markdown("**Target Database:**") # st.info(f"📄 {gsheet_name}") # # SECURITY UPDATE: Check for Secrets first (HuggingFace Spaces / Streamlit Cloud) # # We look for a secret named "GSHEET_CREDENTIALS" containing the JSON string # if "GSHEET_CREDENTIALS" in os.environ: # try: # gsheet_creds = json.loads(os.environ["GSHEET_CREDENTIALS"]) # st.success("✅ Credentials loaded from Environment Secrets") # except Exception as e: # st.error(f"Error loading secrets: {e}") # elif "gcp_service_account" in st.secrets: # # Support for Streamlit Cloud native secrets # gsheet_creds = st.secrets["gcp_service_account"] # st.success("✅ Credentials loaded from Streamlit Secrets") # else: # # Fallback to file uploader for local testing # uploaded_file = st.file_uploader("Service Account JSON", type=['json'], help="Upload for local testing. In prod, use Secrets.") # if uploaded_file is not None: # try: # gsheet_creds = json.load(uploaded_file) # st.success("Credentials loaded from file!") # except: # st.error("Invalid JSON file") # st.divider() # # --- MAIN INTERFACE --- # st.title("✍️ English-to-Tagin Translator & Corrector") # st.markdown("Generate multiple hypotheses, choose the best one, and save it for retraining.") # # Load Model # tokenizer, model, error_msg = load_model(model_path_input) # if error_msg: # st.error(f"❌ Model Error: {error_msg}") # else: # # 1. Input Section # st.subheader("Source Text") # col_lang1, col_lang2 = st.columns(2) # with col_lang1: # source_lang = st.selectbox("Source", ["English", "Tagin"]) # with col_lang2: # target_lang = st.selectbox("Target", ["English", "Tagin"], index=1 if source_lang=="English" else 0) # input_text = st.text_area( # "Input", # height=100, # label_visibility="collapsed", # placeholder="Enter text to translate...", # key="main_input" # ) # if st.button("Translate with Beam Search 🔍", type="primary", use_container_width=True): # if input_text: # with st.spinner("Generating top 5 hypotheses..."): # results = perform_translation_beam(input_text, source_lang, target_lang, model, tokenizer) # st.session_state.translations_list = results # st.session_state.source_text = input_text # Lock in source text # else: # st.warning("Please enter some text.") # st.divider() # # 2. Results & Selection Section # if st.session_state.translations_list: # st.subheader("Select Best Translation") # options = st.session_state.translations_list # radio_options = [f"{i+1}. {text}" for i, text in enumerate(options)] # selected_option_str = st.radio( # "Top 5 Hypotheses (AI Suggestions):", # options=radio_options, # index=0 # ) # selected_index = radio_options.index(selected_option_str) # final_candidate = options[selected_index] # st.markdown("#### Review & Edit Final Output") # st.caption("If none of the above are perfect, edit the text below before saving.") # final_correction = st.text_area("Final Output", value=final_candidate, height=100) # col_save, col_status = st.columns([1, 2]) # with col_save: # if st.button("💾 Save to Dataset", type="primary"): # # Prepare Data Row # timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") # # Simplified language codes for storage # s_code = "en_XX" if source_lang == "English" else "tgj_IN" # t_code = "tgj_IN" if target_lang == "Tagin" else "en_XX" # data_row = [timestamp, s_code, t_code, st.session_state.source_text, final_correction] # # Execute Save Strategy # success = False # msg = "" # if gsheet_creds and gsheet_name: # with st.spinner("Connecting to Google Sheets..."): # success, msg = save_to_gsheet(data_row, gsheet_creds, gsheet_name) # else: # msg = "Missing Credentials or Sheet Name." # if success: # st.success(f"Saved! ({msg})") # time.sleep(1) # st.rerun() # else: # st.error(f"Save Failed: {msg}") # with col_status: # st.caption(f"Saving to Google Sheet: `{gsheet_name}`") # elif input_text: # st.info("Hit 'Translate' to see suggestions.") # # Create some space between main sidebar content and footer # st.sidebar.markdown("
" * 5, unsafe_allow_html=True) # st.sidebar.markdown("---") # st.sidebar.caption("Made with ❤️ by Tungon Dugi") # st.sidebar.caption("Contact: tungondugi@gmail.com") # # Or using columns in sidebar: # col1, col2 = st.sidebar.columns(2) # with col1: # st.caption("© 2026") # with col2: # st.caption("v0.1.1") import streamlit as st import pandas as pd import time from datetime import datetime import os import json # Try to import Google Sheets libraries try: import gspread from oauth2client.service_account import ServiceAccountCredentials GSHEETS_AVAILABLE = True except ImportError: GSHEETS_AVAILABLE = False # --- CONFIGURATION --- st.set_page_config( page_title="Tagin Feedback Loop", page_icon="📝", layout="centered", initial_sidebar_state="expanded" ) # --- SESSION STATE INITIALIZATION --- if "translations_list" not in st.session_state: st.session_state.translations_list = [] if "source_text" not in st.session_state: st.session_state.source_text = "" # --- MODEL LOADING LOGIC --- @st.cache_resource def load_model(model_path): """ Loads the mBART model and tokenizer from a local directory or HuggingFace Hub. """ try: from transformers import MBartForConditionalGeneration, MBart50TokenizerFast import torch tokenizer = MBart50TokenizerFast.from_pretrained(model_path) model = MBartForConditionalGeneration.from_pretrained(model_path) return tokenizer, model, None except Exception as e: return None, None, str(e) def perform_translation_beam(text, source_lang, target_lang, model, tokenizer, num_beams=5): """ Translates text returning top N hypotheses using beam search. """ if not text: return [] lang_map = { "English": "en_XX", "Tagin": "" } src_code = lang_map.get(source_lang) tgt_code = lang_map.get(target_lang) try: tokenizer.src_lang = src_code encoded_input = tokenizer(text, return_tensors="pt") if tgt_code in tokenizer.lang_code_to_id: forced_bos_id = tokenizer.lang_code_to_id[tgt_code] else: forced_bos_id = tokenizer.convert_tokens_to_ids(tgt_code) generated_tokens = model.generate( **encoded_input, forced_bos_token_id=forced_bos_id, num_beams=num_beams, num_return_sequences=num_beams, max_length=128 ) translations = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True) return translations except Exception as e: st.error(f"Translation Error: {str(e)}") return [] def save_to_gsheet(data_row, creds_dict, sheet_name): """Saves data to Google Sheets.""" if not GSHEETS_AVAILABLE: return False, "Libraries 'gspread' and 'oauth2client' not installed." try: scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive'] creds = ServiceAccountCredentials.from_json_keyfile_dict(creds_dict, scope) client = gspread.authorize(creds) try: sheet = client.open(sheet_name).sheet1 except gspread.SpreadsheetNotFound: return False, f"Spreadsheet '{sheet_name}' not found." if not sheet.cell(1, 1).value: sheet.append_row(['timestamp', 'source_lang', 'target_lang', 'source_text', 'corrected_translation']) sheet.append_row(data_row) return True, f"Saved to Google Sheet '{sheet_name}'" except Exception as e: return False, str(e) # --- SIDEBAR --- with st.sidebar: st.title("⚙️ Configuration") st.markdown("**Active Model:**") model_path_input = "Repleeka/mBART-tgj-final" st.code(model_path_input, language=None) st.divider() st.subheader("💾 Storage Settings") gsheet_creds = None if not GSHEETS_AVAILABLE: st.error("⚠️ Install dependencies: `pip install gspread oauth2client`") gsheet_name = "GinLish_Corpus_BOT" st.info(f"📄 {gsheet_name}") if "GSHEET_CREDENTIALS" in os.environ: gsheet_creds = json.loads(os.environ["GSHEET_CREDENTIALS"]) st.success("✅ Credentials loaded (Env)") elif "gcp_service_account" in st.secrets: gsheet_creds = st.secrets["gcp_service_account"] st.success("✅ Credentials loaded (Secrets)") else: uploaded_file = st.file_uploader("Service Account JSON", type=['json']) if uploaded_file: gsheet_creds = json.load(uploaded_file) # --- MAIN INTERFACE --- st.title("✍️ English-to-Tagin Translator") tokenizer, model, error_msg = load_model(model_path_input) if error_msg: st.error(f"❌ Model Error: {error_msg}") else: st.subheader("Source Text") # Use a form to allow 'Enter' key submission with st.form("translation_form", clear_on_submit=False): col_lang1, col_lang2 = st.columns(2) with col_lang1: source_lang = st.selectbox("Source", ["English", "Tagin"]) with col_lang2: target_lang = st.selectbox("Target", ["English", "Tagin"], index=1 if source_lang=="English" else 0) input_text = st.text_area( "Input", height=100, label_visibility="collapsed", placeholder="Enter text and press Enter (or click below) to translate...", key="main_input" ) submit_button = st.form_submit_button("Translate with Beam Search 🔍", type="primary", use_container_width=True) # Process translation when button is clicked OR Enter is pressed if submit_button: if input_text: # Auto-log raw input if gsheet_creds and gsheet_name: try: log_timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") log_s_code = "en_XX" if source_lang == "English" else "tgj_IN" log_t_code = "tgj_IN" if target_lang == "Tagin" else "en_XX" log_row = [log_timestamp, log_s_code, log_t_code, input_text, "[RAW_INPUT]"] save_to_gsheet(log_row, gsheet_creds, gsheet_name) except: pass with st.spinner("Generating hypotheses..."): results = perform_translation_beam(input_text, source_lang, target_lang, model, tokenizer) st.session_state.translations_list = results st.session_state.source_text = input_text else: st.warning("Please enter some text.") st.divider() # 2. Results & Selection Section if st.session_state.translations_list: st.subheader("Select Best Translation") options = st.session_state.translations_list radio_options = [f"{i+1}. {text}" for i, text in enumerate(options)] selected_option_str = st.radio( "Top 5 Hypotheses:", options=radio_options, index=0 ) selected_index = radio_options.index(selected_option_str) final_candidate = options[selected_index] st.markdown("#### Review & Edit Final Output") final_correction = st.text_area("Final Output", value=final_candidate, height=100) col_save, col_status = st.columns([1, 2]) with col_save: if st.button("💾 Save to Dataset", type="primary"): timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") s_code = "en_XX" if source_lang == "English" else "tgj_IN" t_code = "tgj_IN" if target_lang == "Tagin" else "en_XX" data_row = [timestamp, s_code, t_code, st.session_state.source_text, final_correction] if gsheet_creds and gsheet_name: with st.spinner("Saving..."): success, msg = save_to_gsheet(data_row, gsheet_creds, gsheet_name) if success: st.success("Saved!") time.sleep(1) st.rerun() else: st.error(f"Error: {msg}") else: st.error("Missing configuration.") elif input_text: st.info("Hit 'Enter' or click 'Translate' to see suggestions.") st.sidebar.markdown("
" * 5, unsafe_allow_html=True) st.sidebar.markdown("---") st.sidebar.caption("Made with ❤️ by Tungon Dugi") st.sidebar.caption("v0.1.2")