import gradio as gr import sys import os from transformers import AutoModel, AutoTokenizer from transformers.utils import cached_file # Load model and tokenizer from Hugging Face Hub model = AutoModel.from_pretrained("hemantn/ablang2", trust_remote_code=True) tokenizer = AutoTokenizer.from_pretrained("hemantn/ablang2", trust_remote_code=True) # Find the cached model directory and import adapter adapter_path = cached_file("hemantn/ablang2", "adapter.py") cached_model_dir = os.path.dirname(adapter_path) sys.path.insert(0, cached_model_dir) # Import and create the adapter from adapter import AbLang2PairedHuggingFaceAdapter ablang = AbLang2PairedHuggingFaceAdapter(model=model, tokenizer=tokenizer) def restore_sequences(heavy_chain, light_chain, use_align=False): """ Restore masked residues in antibody sequences. Args: heavy_chain (str): Heavy chain sequence with masked residues (*) light_chain (str): Light chain sequence with masked residues (*) use_align (bool): Whether to use alignment for variable missing lengths Returns: tuple: (restored_heavy, restored_light, highlighted_heavy, highlighted_light) """ try: # Check if alignment is requested but not available if use_align: try: import anarci except ImportError: return "Alignment feature requires 'anarci' package which is not available. Please disable alignment option.", "", "" # Prepare input sequences if heavy_chain.strip() and light_chain.strip(): # Both chains provided sequences = [[heavy_chain.strip(), light_chain.strip()]] elif heavy_chain.strip(): # Only heavy chain provided sequences = [[heavy_chain.strip(), ""]] elif light_chain.strip(): # Only light chain provided sequences = [["", light_chain.strip()]] else: return "Please provide at least one antibody chain sequence.", "", "", "" # Perform restoration restored = ablang(sequences, mode='restore', align=use_align) # Format output if hasattr(restored, '__len__') and len(restored) > 0: result = restored[0] # Get the first (and only) result # Parse the result to separate heavy and light chains if '>|<' in result: # Both chains present heavy_part = result.split('>|<')[0].replace('<', '').replace('>', '') light_part = result.split('>|<')[1].replace('<', '').replace('>', '') elif result.startswith('<') and result.endswith('>'): # Only one chain present if heavy_chain.strip(): heavy_part = result.replace('<', '').replace('>', '') light_part = "" else: heavy_part = "" light_part = result.replace('<', '').replace('>', '') else: return "Error: Unexpected result format.", "", "", "" # Create highlighted versions highlighted_heavy = highlight_restored_residues(heavy_chain.strip(), heavy_part) highlighted_light = highlight_restored_residues(light_chain.strip(), light_part) # Create HTML outputs with proper styling - no scroll, wrap text heavy_html = f'
{highlighted_heavy}
' light_html = f'
{highlighted_light}
' return heavy_html, light_html else: return "Error: No restoration result obtained.", "", "" except Exception as e: return f"Error during restoration: {str(e)}", "", "" def highlight_restored_residues(original_seq, restored_seq): """ Highlight restored residues in green. """ if not original_seq or not restored_seq: return restored_seq highlighted = "" for i, (orig_char, rest_char) in enumerate(zip(original_seq, restored_seq)): if orig_char == '*' and rest_char != '*': # This residue was restored highlighted += f'{rest_char}' else: highlighted += rest_char # Add any remaining characters from restored sequence if len(restored_seq) > len(original_seq): highlighted += restored_seq[len(original_seq):] return highlighted # Create Gradio interface with gr.Blocks(title="AbLang2 Sequence Restorer", theme=gr.themes.Soft(), css=""" * { font-family: 'Courier New', monospace !important; } .sequence-input, .sequence-output { font-family: 'Courier New', monospace !important; font-size: 14px !important; letter-spacing: 0.5px !important; } .restored-highlight { background-color: #90EE90 !important; color: #000 !important; font-weight: bold !important; } .examples { font-family: 'Courier New', monospace !important; font-size: 14px !important; letter-spacing: 0.5px !important; } .restored-sequence-box { font-family: 'Courier New', monospace !important; font-size: 14px !important; letter-spacing: 0.5px !important; white-space: pre-wrap !important; word-wrap: break-word !important; overflow-wrap: break-word !important; } .restored-heading { color: #2E8B57 !important; font-weight: bold !important; font-size: 18px !important; } .example-text { font-family: 'Courier New', monospace !important; font-size: 12px !important; white-space: pre-wrap !important; word-wrap: break-word !important; } .examples-table { font-family: 'Courier New', monospace !important; font-size: 12px !important; white-space: pre-wrap !important; word-wrap: break-word !important; max-width: none !important; overflow: visible !important; } .examples-table td { font-family: 'Courier New', monospace !important; font-size: 12px !important; white-space: pre-wrap !important; word-wrap: break-word !important; max-width: none !important; overflow: visible !important; text-overflow: unset !important; } .sequence-output label { font-weight: bold !important; color: #495057 !important; font-size: 14px !important; margin-bottom: 5px !important; } /* Force full display of examples */ .examples-container { font-family: 'Courier New', monospace !important; font-size: 12px !important; } .examples-container table { width: 100% !important; table-layout: auto !important; } .examples-container td { white-space: pre-wrap !important; word-wrap: break-word !important; overflow-wrap: break-word !important; max-width: none !important; text-overflow: unset !important; padding: 8px !important; vertical-align: top !important; } .examples-container th { white-space: nowrap !important; padding: 8px !important; } /* Override any Gradio default truncation */ .examples table td { white-space: pre-wrap !important; word-wrap: break-word !important; overflow-wrap: break-word !important; max-width: none !important; text-overflow: unset !important; overflow: visible !important; font-family: 'Courier New', monospace !important; font-size: 12px !important; } .examples table { table-layout: auto !important; width: 100% !important; } /* Target the specific examples component */ div[data-testid="examples"] table td { white-space: pre-wrap !important; word-wrap: break-word !important; overflow-wrap: break-word !important; max-width: none !important; text-overflow: unset !important; overflow: visible !important; font-family: 'Courier New', monospace !important; font-size: 12px !important; } /* Force examples to show full content */ .examples table, .examples table td, .examples table th { white-space: pre-wrap !important; word-wrap: break-word !important; overflow-wrap: break-word !important; max-width: none !important; text-overflow: unset !important; overflow: visible !important; font-family: 'Courier New', monospace !important; font-size: 12px !important; table-layout: auto !important; width: auto !important; min-width: 100% !important; } /* Override any inline styles */ .examples * { white-space: pre-wrap !important; word-wrap: break-word !important; overflow-wrap: break-word !important; max-width: none !important; text-overflow: unset !important; overflow: visible !important; } /* Style output labels to match input labels exactly */ .output-label { font-weight: 600 !important; color: var(--label-text-color) !important; font-size: 14px !important; margin-bottom: 8px !important; margin-top: 16px !important; line-height: 1.4 !important; display: block !important; } """) as demo: gr.Markdown(""" # 🧬 AbLang2 Sequence Restorer This app uses the AbLang2 model to restore masked residues (*) in antibody sequences. You can provide either one or both heavy and light chain sequences. **Instructions:** - Use `*` to mask residues you want to restore - Provide heavy chain, light chain, or both - Enable "Use Alignment" for variable missing lengths """) with gr.Row(): with gr.Column(): heavy_input = gr.Textbox( label="Heavy Chain Sequence", placeholder="Enter heavy chain sequence with masked residues (*)...", lines=3, max_lines=5, elem_classes=["sequence-input"] ) light_input = gr.Textbox( label="Light Chain Sequence", placeholder="Enter light chain sequence with masked residues (*)...", lines=3, max_lines=5, elem_classes=["sequence-input"] ) align_checkbox = gr.Checkbox( label="Use Alignment (for variable missing lengths) - Requires anarci package", value=False ) restore_btn = gr.Button("🔄 Restore Sequences", variant="primary") with gr.Column(): gr.Markdown("### 🧬 Restored Sequences", elem_classes=["restored-heading"]) gr.Markdown("*Green highlighting shows restored residues*") gr.Markdown("**Heavy Chain Sequence**", elem_classes=["output-label"]) heavy_output = gr.HTML(label="") gr.Markdown("**Light Chain Sequence**", elem_classes=["output-label"]) light_output = gr.HTML(label="") # Example sequences gr.Examples( examples=[ [ "EVQ***SGGEVKKPGASVKVSCRASGYTFRNYGLTWVRQAPGQGLEWMGWISAYNGNTNYAQKFQGRVTLTTDTSTSTAYMELRSLRSDDTAVYFCAR**PGHGAAFMDVWGTGTTVTVSS", "DIQLTQSPLSLPVTLGQPASISCRSS*SLEASDTNIYLSWFQQRPGQSPRRLIYKI*NRDSGVPDRFSGSGSGTHFTLRISRVEADDVAVYYCMQGTHWPPAFGQGTKVDIK" ], [ "EVQLVESGGGLVQPGGSLRLSCAASGFTFSSYAMGWVRQAPGKGLEWVSAISGSGGSTYYADSVKGRFTISRDNSKNTLYLQMNSLRAEDTAVYYCARDY**GMDVWGQGTTVTVSS", "" ], [ "", "DIQLTQSPSSLSASVGDRVTITCRASQSISSYLNWYQQKPGKAPKLLIY*ASSLQSGVPSRFSGSGSGTDFTLTISSLQPEDFATYYCQQSYSTP*TFGQGTKVEIK" ] ], inputs=[heavy_input, light_input], label="Example Sequences" ) # Connect the button to the function restore_btn.click( fn=restore_sequences, inputs=[heavy_input, light_input, align_checkbox], outputs=[heavy_output, light_output] ) gr.Markdown(""" --- **Note:** This app uses the AbLang2 model from Hugging Face Hub. The restoration process may take a few seconds depending on sequence length and complexity. """) if __name__ == "__main__": demo.launch()