import gradio as gr import json # Load JSONL data def load_data(path='/Users/suyashsrivastava/f_5_repro/sample_viewer/samples.jsonl'): data = [] with open(path, 'r', encoding='utf-8') as f: for line in f: try: item = json.loads(line) if not item.get("droped", False): # Only include non-dropped items data.append(item) except json.JSONDecodeError: continue # Skip invalid JSON lines return data data = load_data() print(f"Loaded {len(data)} valid samples") # Unique dropdown values with fallback values languages = sorted(set(d.get('language', '') for d in data if d.get('language'))) modes = sorted(set(d.get('translation_mode', '') for d in data if d.get('translation_mode'))) all_categories = sorted(set(cat for d in data if d.get('categories') for cat in d.get('categories', '').split('|') if cat)) print(f"Found {len(languages)} languages, {len(modes)} modes, and {len(all_categories)} categories") def get_categories_for_selection(language, mode): """Get categories available for the selected language and mode""" if not language or not mode: return [] filtered_categories = set() for item in data: if (item.get('language') == language and item.get('translation_mode') == mode and 'categories' in item): categories = item['categories'].split('|') filtered_categories.update(categories) return sorted(filtered_categories) def filter_samples(mode, language, category): if not mode or not language or not category: return "Please select all filters (Translation Mode, Language, and Category)" filtered_samples = [] count = 0 total_checked = 0 print(f"Filtering for: Mode={mode}, Language={language}, Category={category}") for item in data: total_checked += 1 # Skip items missing required fields if not all(k in item for k in ['translation_mode', 'language', 'categories']): continue # Simple string matching for each filter if item['translation_mode'] != mode: continue if item['language'] != language: continue if category not in item['categories'].split('|'): continue # If we get here, the item matches all criteria count += 1 sample_html = f"""
Sample {count}

Text:
{item.get('text', '')}

Translit:
{item.get('translit_text') or '(None)'}

Original:
{item.get('original_text', '')}

""" filtered_samples.append(sample_html) print(f"Checked {total_checked} items, found {count} matches") if filtered_samples: all_html = "".join(filtered_samples) return all_html else: debug_info = f"

Debug info: Checked {total_checked} items, found 0 matches for Mode={mode}, Language={language}, Category={category}

" return f"

No matching samples found for the selected filters. Try different filter combinations.

{debug_info}" # Custom CSS for dark theme custom_css = """ footer {visibility: hidden} .gradio-container { background-color: #1a202c; color: #e2e8f0; } .dark h1, .dark h2, .dark h3 { color: #e2e8f0 !important; } .gradio-dropdown { background-color: #2d3748; color: #e2e8f0; border-color: #4a5568; } .dark button.primary { background-color: #4299e1 !important; } .dark label { color: #e2e8f0 !important; } .dark p { color: #e2e8f0 !important; } """ # Gradio interface using Blocks with gr.Blocks(title="Sample Viewer", css=custom_css, theme="dark") as demo: gr.Markdown("## 📘 Multilingual Sample Viewer") gr.Markdown("Select filters to view samples from the dataset") with gr.Row(): mode_input = gr.Dropdown(choices=[""] + modes, label="Translation Mode", value="") lang_input = gr.Dropdown(choices=[""] + languages, label="Language", value="") cat_input = gr.Dropdown(choices=[""], label="Category", value="") # Update categories when language or mode changes def update_categories(language, mode): if not language or not mode: return gr.Dropdown(choices=[""], value="") categories = get_categories_for_selection(language, mode) return gr.Dropdown(choices=[""] + categories, value="") # Set up dependencies to update categories dropdown lang_input.change( fn=update_categories, inputs=[lang_input, mode_input], outputs=cat_input ) mode_input.change( fn=update_categories, inputs=[lang_input, mode_input], outputs=cat_input ) submit_btn = gr.Button("🔍 Show Samples", variant="primary") output_display = gr.HTML(label="Results") submit_btn.click( fn=filter_samples, inputs=[mode_input, lang_input, cat_input], outputs=output_display ) if __name__ == "__main__": demo.launch(share=True)