repleeka commited on
Commit
a25d063
Β·
verified Β·
1 Parent(s): fc62187

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -103
app.py CHANGED
@@ -355,24 +355,18 @@ def save_to_gsheet(data_row, creds_dict, sheet_name):
355
  return False, "Libraries 'gspread' and 'oauth2client' not installed."
356
 
357
  try:
358
- # Define scope
359
  scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']
360
-
361
- # Authenticate using the dictionary (from secrets or file)
362
  creds = ServiceAccountCredentials.from_json_keyfile_dict(creds_dict, scope)
363
  client = gspread.authorize(creds)
364
 
365
- # Open the sheet
366
  try:
367
  sheet = client.open(sheet_name).sheet1
368
  except gspread.SpreadsheetNotFound:
369
- return False, f"Spreadsheet '{sheet_name}' not found. Please share it with the service account email."
370
 
371
- # Check if header exists (simple check: is cell A1 empty?)
372
  if not sheet.cell(1, 1).value:
373
  sheet.append_row(['timestamp', 'source_lang', 'target_lang', 'source_text', 'corrected_translation'])
374
 
375
- # Append data
376
  sheet.append_row(data_row)
377
  return True, f"Saved to Google Sheet '{sheet_name}'"
378
 
@@ -382,103 +376,76 @@ def save_to_gsheet(data_row, creds_dict, sheet_name):
382
  # --- SIDEBAR ---
383
  with st.sidebar:
384
  st.title("βš™οΈ Configuration")
385
-
386
- # Fixed Model Path
387
  st.markdown("**Active Model:**")
388
  model_path_input = "Repleeka/mBART-tgj-final"
389
  st.code(model_path_input, language=None)
390
 
391
  st.divider()
392
-
393
  st.subheader("πŸ’Ύ Storage Settings")
394
- st.caption("All corrections are saved to Google Sheets.")
395
 
396
  gsheet_creds = None
397
-
398
- # Check dependencies
399
  if not GSHEETS_AVAILABLE:
400
- st.error("⚠️ Install gspread: `pip install gspread oauth2client`")
401
 
402
- # Fixed Google Sheet Name
403
  gsheet_name = "GinLish_Corpus_BOT"
404
- st.markdown("**Target Database:**")
405
  st.info(f"πŸ“„ {gsheet_name}")
406
 
407
- # SECURITY UPDATE: Check for Secrets first (HuggingFace Spaces / Streamlit Cloud)
408
  if "GSHEET_CREDENTIALS" in os.environ:
409
- try:
410
- gsheet_creds = json.loads(os.environ["GSHEET_CREDENTIALS"])
411
- st.success("βœ… Credentials loaded from Environment Secrets")
412
- except Exception as e:
413
- st.error(f"Error loading secrets: {e}")
414
  elif "gcp_service_account" in st.secrets:
415
- # Support for Streamlit Cloud native secrets
416
- gsheet_creds = st.secrets["gcp_service_account"]
417
- st.success("βœ… Credentials loaded from Streamlit Secrets")
418
  else:
419
- # Fallback to file uploader for local testing
420
- uploaded_file = st.file_uploader("Service Account JSON", type=['json'], help="Upload for local testing. In prod, use Secrets.")
421
- if uploaded_file is not None:
422
- try:
423
- gsheet_creds = json.load(uploaded_file)
424
- st.success("Credentials loaded from file!")
425
- except:
426
- st.error("Invalid JSON file")
427
-
428
- st.divider()
429
 
430
  # --- MAIN INTERFACE ---
431
- st.title("✍️ English-to-Tagin Translator & Corrector")
432
- st.markdown("Generate multiple hypotheses, choose the best one, and save it for retraining.")
433
-
434
- # Load Model
435
  tokenizer, model, error_msg = load_model(model_path_input)
436
 
437
  if error_msg:
438
  st.error(f"❌ Model Error: {error_msg}")
439
  else:
440
- # 1. Input Section
441
  st.subheader("Source Text")
442
 
443
- col_lang1, col_lang2 = st.columns(2)
444
- with col_lang1:
445
- source_lang = st.selectbox("Source", ["English", "Tagin"])
446
- with col_lang2:
447
- target_lang = st.selectbox("Target", ["English", "Tagin"], index=1 if source_lang=="English" else 0)
448
-
449
- input_text = st.text_area(
450
- "Input",
451
- height=100,
452
- label_visibility="collapsed",
453
- placeholder="Enter text to translate...",
454
- key="main_input"
455
- )
456
-
457
- if st.button("Translate with Beam Search πŸ”", type="primary", use_container_width=True):
 
 
 
 
 
458
  if input_text:
459
- # --- AUTO-SAVE SOURCE TEXT LOGIC ---
460
- # This block saves the raw input immediately, regardless of future edits
461
  if gsheet_creds and gsheet_name:
462
  try:
463
- # Prepare Data Row for Auto-Log
464
  log_timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
465
  log_s_code = "en_XX" if source_lang == "English" else "tgj_IN"
466
  log_t_code = "tgj_IN" if target_lang == "Tagin" else "en_XX"
467
-
468
- # We save with "[RAW_INPUT]" as the translation to indicate it hasn't been verified/edited yet
469
  log_row = [log_timestamp, log_s_code, log_t_code, input_text, "[RAW_INPUT]"]
470
-
471
- # Save silently (no spinner to avoid slowing down UX too much)
472
  save_to_gsheet(log_row, gsheet_creds, gsheet_name)
473
- print(f"Logged input: {input_text}") # Console log for debugging
474
- except Exception as e:
475
- print(f"Failed to auto-log input: {e}")
476
- # -----------------------------------
477
 
478
- with st.spinner("Generating top 5 hypotheses..."):
479
  results = perform_translation_beam(input_text, source_lang, target_lang, model, tokenizer)
480
  st.session_state.translations_list = results
481
- st.session_state.source_text = input_text # Lock in source text
482
  else:
483
  st.warning("Please enter some text.")
484
 
@@ -487,12 +454,11 @@ else:
487
  # 2. Results & Selection Section
488
  if st.session_state.translations_list:
489
  st.subheader("Select Best Translation")
490
-
491
  options = st.session_state.translations_list
492
  radio_options = [f"{i+1}. {text}" for i, text in enumerate(options)]
493
 
494
  selected_option_str = st.radio(
495
- "Top 5 Hypotheses (AI Suggestions):",
496
  options=radio_options,
497
  index=0
498
  )
@@ -501,55 +467,32 @@ else:
501
  final_candidate = options[selected_index]
502
 
503
  st.markdown("#### Review & Edit Final Output")
504
- st.caption("If none of the above are perfect, edit the text below before saving.")
505
-
506
  final_correction = st.text_area("Final Output", value=final_candidate, height=100)
507
 
508
  col_save, col_status = st.columns([1, 2])
509
-
510
  with col_save:
511
  if st.button("πŸ’Ύ Save to Dataset", type="primary"):
512
-
513
- # Prepare Data Row
514
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
515
- # Simplified language codes for storage
516
  s_code = "en_XX" if source_lang == "English" else "tgj_IN"
517
  t_code = "tgj_IN" if target_lang == "Tagin" else "en_XX"
518
-
519
  data_row = [timestamp, s_code, t_code, st.session_state.source_text, final_correction]
520
 
521
- # Execute Save Strategy
522
- success = False
523
- msg = ""
524
-
525
  if gsheet_creds and gsheet_name:
526
- with st.spinner("Connecting to Google Sheets..."):
527
  success, msg = save_to_gsheet(data_row, gsheet_creds, gsheet_name)
 
 
 
 
 
 
528
  else:
529
- msg = "Missing Credentials or Sheet Name."
530
-
531
- if success:
532
- st.success(f"Saved! ({msg})")
533
- time.sleep(1)
534
- st.rerun()
535
- else:
536
- st.error(f"Save Failed: {msg}")
537
-
538
- with col_status:
539
- st.caption(f"Saving to Google Sheet: `{gsheet_name}`")
540
 
541
  elif input_text:
542
- st.info("Hit 'Translate' to see suggestions.")
543
 
544
- # Create some space between main sidebar content and footer
545
  st.sidebar.markdown("<br>" * 5, unsafe_allow_html=True)
546
  st.sidebar.markdown("---")
547
  st.sidebar.caption("Made with ❀️ by Tungon Dugi")
548
- st.sidebar.caption("Contact: tungondugi@gmail.com")
549
-
550
- # Or using columns in sidebar:
551
- col1, col2 = st.sidebar.columns(2)
552
- with col1:
553
- st.caption("Β© 2026")
554
- with col2:
555
- st.caption("v0.1.2")
 
355
  return False, "Libraries 'gspread' and 'oauth2client' not installed."
356
 
357
  try:
 
358
  scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']
 
 
359
  creds = ServiceAccountCredentials.from_json_keyfile_dict(creds_dict, scope)
360
  client = gspread.authorize(creds)
361
 
 
362
  try:
363
  sheet = client.open(sheet_name).sheet1
364
  except gspread.SpreadsheetNotFound:
365
+ return False, f"Spreadsheet '{sheet_name}' not found."
366
 
 
367
  if not sheet.cell(1, 1).value:
368
  sheet.append_row(['timestamp', 'source_lang', 'target_lang', 'source_text', 'corrected_translation'])
369
 
 
370
  sheet.append_row(data_row)
371
  return True, f"Saved to Google Sheet '{sheet_name}'"
372
 
 
376
  # --- SIDEBAR ---
377
  with st.sidebar:
378
  st.title("βš™οΈ Configuration")
 
 
379
  st.markdown("**Active Model:**")
380
  model_path_input = "Repleeka/mBART-tgj-final"
381
  st.code(model_path_input, language=None)
382
 
383
  st.divider()
 
384
  st.subheader("πŸ’Ύ Storage Settings")
 
385
 
386
  gsheet_creds = None
 
 
387
  if not GSHEETS_AVAILABLE:
388
+ st.error("⚠️ Install dependencies: `pip install gspread oauth2client`")
389
 
 
390
  gsheet_name = "GinLish_Corpus_BOT"
 
391
  st.info(f"πŸ“„ {gsheet_name}")
392
 
 
393
  if "GSHEET_CREDENTIALS" in os.environ:
394
+ gsheet_creds = json.loads(os.environ["GSHEET_CREDENTIALS"])
395
+ st.success("βœ… Credentials loaded (Env)")
 
 
 
396
  elif "gcp_service_account" in st.secrets:
397
+ gsheet_creds = st.secrets["gcp_service_account"]
398
+ st.success("βœ… Credentials loaded (Secrets)")
 
399
  else:
400
+ uploaded_file = st.file_uploader("Service Account JSON", type=['json'])
401
+ if uploaded_file:
402
+ gsheet_creds = json.load(uploaded_file)
 
 
 
 
 
 
 
403
 
404
  # --- MAIN INTERFACE ---
405
+ st.title("✍️ English-to-Tagin Translator")
 
 
 
406
  tokenizer, model, error_msg = load_model(model_path_input)
407
 
408
  if error_msg:
409
  st.error(f"❌ Model Error: {error_msg}")
410
  else:
 
411
  st.subheader("Source Text")
412
 
413
+ # Use a form to allow 'Enter' key submission
414
+ with st.form("translation_form", clear_on_submit=False):
415
+ col_lang1, col_lang2 = st.columns(2)
416
+ with col_lang1:
417
+ source_lang = st.selectbox("Source", ["English", "Tagin"])
418
+ with col_lang2:
419
+ target_lang = st.selectbox("Target", ["English", "Tagin"], index=1 if source_lang=="English" else 0)
420
+
421
+ input_text = st.text_area(
422
+ "Input",
423
+ height=100,
424
+ label_visibility="collapsed",
425
+ placeholder="Enter text and press Enter (or click below) to translate...",
426
+ key="main_input"
427
+ )
428
+
429
+ submit_button = st.form_submit_button("Translate with Beam Search πŸ”", type="primary", use_container_width=True)
430
+
431
+ # Process translation when button is clicked OR Enter is pressed
432
+ if submit_button:
433
  if input_text:
434
+ # Auto-log raw input
 
435
  if gsheet_creds and gsheet_name:
436
  try:
 
437
  log_timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
438
  log_s_code = "en_XX" if source_lang == "English" else "tgj_IN"
439
  log_t_code = "tgj_IN" if target_lang == "Tagin" else "en_XX"
 
 
440
  log_row = [log_timestamp, log_s_code, log_t_code, input_text, "[RAW_INPUT]"]
 
 
441
  save_to_gsheet(log_row, gsheet_creds, gsheet_name)
442
+ except:
443
+ pass
 
 
444
 
445
+ with st.spinner("Generating hypotheses..."):
446
  results = perform_translation_beam(input_text, source_lang, target_lang, model, tokenizer)
447
  st.session_state.translations_list = results
448
+ st.session_state.source_text = input_text
449
  else:
450
  st.warning("Please enter some text.")
451
 
 
454
  # 2. Results & Selection Section
455
  if st.session_state.translations_list:
456
  st.subheader("Select Best Translation")
 
457
  options = st.session_state.translations_list
458
  radio_options = [f"{i+1}. {text}" for i, text in enumerate(options)]
459
 
460
  selected_option_str = st.radio(
461
+ "Top 5 Hypotheses:",
462
  options=radio_options,
463
  index=0
464
  )
 
467
  final_candidate = options[selected_index]
468
 
469
  st.markdown("#### Review & Edit Final Output")
 
 
470
  final_correction = st.text_area("Final Output", value=final_candidate, height=100)
471
 
472
  col_save, col_status = st.columns([1, 2])
 
473
  with col_save:
474
  if st.button("πŸ’Ύ Save to Dataset", type="primary"):
 
 
475
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
 
476
  s_code = "en_XX" if source_lang == "English" else "tgj_IN"
477
  t_code = "tgj_IN" if target_lang == "Tagin" else "en_XX"
 
478
  data_row = [timestamp, s_code, t_code, st.session_state.source_text, final_correction]
479
 
 
 
 
 
480
  if gsheet_creds and gsheet_name:
481
+ with st.spinner("Saving..."):
482
  success, msg = save_to_gsheet(data_row, gsheet_creds, gsheet_name)
483
+ if success:
484
+ st.success("Saved!")
485
+ time.sleep(1)
486
+ st.rerun()
487
+ else:
488
+ st.error(f"Error: {msg}")
489
  else:
490
+ st.error("Missing configuration.")
 
 
 
 
 
 
 
 
 
 
491
 
492
  elif input_text:
493
+ st.info("Hit 'Enter' or click 'Translate' to see suggestions.")
494
 
 
495
  st.sidebar.markdown("<br>" * 5, unsafe_allow_html=True)
496
  st.sidebar.markdown("---")
497
  st.sidebar.caption("Made with ❀️ by Tungon Dugi")
498
+ st.sidebar.caption("v0.1.2")