Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import json | |
| # Load JSONL data | |
| def load_data(path='/Users/suyashsrivastava/f_5_repro/sample_viewer/samples.jsonl'): | |
| data = [] | |
| with open(path, 'r', encoding='utf-8') as f: | |
| for line in f: | |
| try: | |
| item = json.loads(line) | |
| if not item.get("droped", False): # Only include non-dropped items | |
| data.append(item) | |
| except json.JSONDecodeError: | |
| continue # Skip invalid JSON lines | |
| return data | |
| data = load_data() | |
| print(f"Loaded {len(data)} valid samples") | |
| # Unique dropdown values with fallback values | |
| languages = sorted(set(d.get('language', '') for d in data if d.get('language'))) | |
| modes = sorted(set(d.get('translation_mode', '') for d in data if d.get('translation_mode'))) | |
| all_categories = sorted(set(cat for d in data if d.get('categories') for cat in d.get('categories', '').split('|') if cat)) | |
| print(f"Found {len(languages)} languages, {len(modes)} modes, and {len(all_categories)} categories") | |
| def get_categories_for_selection(language, mode): | |
| """Get categories available for the selected language and mode""" | |
| if not language or not mode: | |
| return [] | |
| filtered_categories = set() | |
| for item in data: | |
| if (item.get('language') == language and | |
| item.get('translation_mode') == mode and | |
| 'categories' in item): | |
| categories = item['categories'].split('|') | |
| filtered_categories.update(categories) | |
| return sorted(filtered_categories) | |
| def filter_samples(mode, language, category): | |
| if not mode or not language or not category: | |
| return "Please select all filters (Translation Mode, Language, and Category)" | |
| filtered_samples = [] | |
| count = 0 | |
| total_checked = 0 | |
| print(f"Filtering for: Mode={mode}, Language={language}, Category={category}") | |
| for item in data: | |
| total_checked += 1 | |
| # Skip items missing required fields | |
| if not all(k in item for k in ['translation_mode', 'language', 'categories']): | |
| continue | |
| # Simple string matching for each filter | |
| if item['translation_mode'] != mode: | |
| continue | |
| if item['language'] != language: | |
| continue | |
| if category not in item['categories'].split('|'): | |
| continue | |
| # If we get here, the item matches all criteria | |
| count += 1 | |
| sample_html = f""" | |
| <div style="margin-bottom: 20px; border: 1px solid #4a5568; border-radius: 8px; overflow: hidden; background-color: #2d3748;"> | |
| <div style="background-color: #1a202c; padding: 10px; border-bottom: 1px solid #4a5568;"> | |
| <strong style="color: #e2e8f0;">Sample {count}</strong> | |
| </div> | |
| <div style="padding: 15px;"> | |
| <p><strong style="color:#90cdf4;">Text:</strong><br><span style="color:#e2e8f0;">{item.get('text', '')}</span></p> | |
| <p><strong style="color:#9ae6b4;">Translit:</strong><br><span style="color:#e2e8f0;">{item.get('translit_text') or '(None)'}</span></p> | |
| <p><strong style="color:#fbd38d;">Original:</strong><br><span style="color:#e2e8f0;">{item.get('original_text', '')}</span></p> | |
| </div> | |
| </div> | |
| """ | |
| filtered_samples.append(sample_html) | |
| print(f"Checked {total_checked} items, found {count} matches") | |
| if filtered_samples: | |
| all_html = "".join(filtered_samples) | |
| return all_html | |
| else: | |
| debug_info = f"<p style='color: #e2e8f0;'>Debug info: Checked {total_checked} items, found 0 matches for Mode={mode}, Language={language}, Category={category}</p>" | |
| return f"<p style='color: #f56565;'><strong>No matching samples found for the selected filters.</strong> Try different filter combinations.</p>{debug_info}" | |
| # Custom CSS for dark theme | |
| custom_css = """ | |
| footer {visibility: hidden} | |
| .gradio-container { | |
| background-color: #1a202c; | |
| color: #e2e8f0; | |
| } | |
| .dark h1, .dark h2, .dark h3 { | |
| color: #e2e8f0 !important; | |
| } | |
| .gradio-dropdown { | |
| background-color: #2d3748; | |
| color: #e2e8f0; | |
| border-color: #4a5568; | |
| } | |
| .dark button.primary { | |
| background-color: #4299e1 !important; | |
| } | |
| .dark label { | |
| color: #e2e8f0 !important; | |
| } | |
| .dark p { | |
| color: #e2e8f0 !important; | |
| } | |
| """ | |
| # Gradio interface using Blocks | |
| with gr.Blocks(title="Sample Viewer", css=custom_css, theme="dark") as demo: | |
| gr.Markdown("## 📘 Multilingual Sample Viewer") | |
| gr.Markdown("Select filters to view samples from the dataset") | |
| with gr.Row(): | |
| mode_input = gr.Dropdown(choices=[""] + modes, label="Translation Mode", value="") | |
| lang_input = gr.Dropdown(choices=[""] + languages, label="Language", value="") | |
| cat_input = gr.Dropdown(choices=[""], label="Category", value="") | |
| # Update categories when language or mode changes | |
| def update_categories(language, mode): | |
| if not language or not mode: | |
| return gr.Dropdown(choices=[""], value="") | |
| categories = get_categories_for_selection(language, mode) | |
| return gr.Dropdown(choices=[""] + categories, value="") | |
| # Set up dependencies to update categories dropdown | |
| lang_input.change( | |
| fn=update_categories, | |
| inputs=[lang_input, mode_input], | |
| outputs=cat_input | |
| ) | |
| mode_input.change( | |
| fn=update_categories, | |
| inputs=[lang_input, mode_input], | |
| outputs=cat_input | |
| ) | |
| submit_btn = gr.Button("🔍 Show Samples", variant="primary") | |
| output_display = gr.HTML(label="Results") | |
| submit_btn.click( | |
| fn=filter_samples, | |
| inputs=[mode_input, lang_input, cat_input], | |
| outputs=output_display | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(share=True) | |