import gradio as gr import sys import os from transformers import AutoModel, AutoTokenizer from transformers.utils import cached_file # Global variables for model loading model = None tokenizer = None ablang = None def load_model(): """Load the AbLang2 model and tokenizer.""" global model, tokenizer, ablang if model is None: print("Loading AbLang2 model...") try: # First, let's get the model directory and ensure all files are downloaded from huggingface_hub import hf_hub_download # Download all necessary files adapter_path = hf_hub_download("hemantn/ablang2", "adapter.py") config_path = hf_hub_download("hemantn/ablang2", "configuration_ablang2paired.py") modeling_path = hf_hub_download("hemantn/ablang2", "modeling_ablang2paired.py") cached_model_dir = os.path.dirname(adapter_path) print(f"Model directory: {cached_model_dir}") # Add the model directory to Python path if cached_model_dir not in sys.path: sys.path.insert(0, cached_model_dir) # Create an __init__.py file in the model directory to make it a Python package init_file = os.path.join(cached_model_dir, "__init__.py") if not os.path.exists(init_file): with open(init_file, 'w') as f: f.write("# Auto-generated __init__.py for AbLang2 model files\n") print(f"Created __init__.py in {cached_model_dir}") # Now load the model and tokenizer model = AutoModel.from_pretrained("hemantn/ablang2", trust_remote_code=True) tokenizer = AutoTokenizer.from_pretrained("hemantn/ablang2", trust_remote_code=True) # Import and create the adapter from adapter import AbLang2PairedHuggingFaceAdapter ablang = AbLang2PairedHuggingFaceAdapter(model=model, tokenizer=tokenizer) print("Model loaded successfully!") except Exception as e: print(f"Error loading model: {str(e)}") raise e return ablang def restore_sequences(heavy_chain, light_chain, use_align=False): """ Restore masked residues in antibody sequences. Args: heavy_chain (str): Heavy chain sequence with masked residues (*) light_chain (str): Light chain sequence with masked residues (*) use_align (bool): Whether to use alignment for variable missing lengths Returns: tuple: (restored_heavy, restored_light, highlighted_heavy, highlighted_light) """ try: # Prepare input sequences if heavy_chain.strip() and light_chain.strip(): # Both chains provided sequences = [[heavy_chain.strip(), light_chain.strip()]] elif heavy_chain.strip(): # Only heavy chain provided sequences = [[heavy_chain.strip(), ""]] elif light_chain.strip(): # Only light chain provided sequences = [["", light_chain.strip()]] else: return "Please provide at least one antibody chain sequence.", "" # Load model if not already loaded ablang_instance = load_model() # Perform restoration restored = ablang_instance(sequences, mode='restore', align=use_align) # Debug: Check what we got back if restored is None: return "Error: No restoration result returned from model.", "" # Debug: Log the input and output for troubleshooting print(f"Input sequences: {sequences}") print(f"Restored result: {restored}") print(f"Result type: {type(restored)}") # Format output if isinstance(restored, list) and len(restored) > 0: result = restored[0] # Get the first (and only) result elif isinstance(restored, str): result = restored # Single string result elif hasattr(restored, '__len__') and len(restored) > 0: # Handle numpy arrays and other array-like objects result = restored[0] # Get the first (and only) result else: return f"Error: Unexpected result type: {type(restored)}. Result: {restored}", "" # Parse the result to separate heavy and light chains if '>|<' in result: # Both chains present parts = result.split('>|<') heavy_part = parts[0].replace('<', '').replace('>', '') light_part = parts[1].replace('<', '').replace('>', '') elif result.startswith('<') and result.endswith('>'): # Only one chain present if heavy_chain.strip(): heavy_part = result.replace('<', '').replace('>', '') light_part = "" else: heavy_part = "" light_part = result.replace('<', '').replace('>', '') elif result == '|<>': # Empty result - no sequences to restore # This happens when there are no masked residues (*) to restore if heavy_chain.strip(): heavy_part = heavy_chain.strip() light_part = "" else: heavy_part = "" light_part = light_chain.strip() else: return f"Error: Unexpected result format: '{result}'", "" # Create highlighted versions highlighted_heavy = highlight_restored_residues(heavy_chain.strip() if heavy_chain.strip() else "", heavy_part) highlighted_light = highlight_restored_residues(light_chain.strip() if light_chain.strip() else "", light_part) # Create HTML outputs with proper styling - no scroll, wrap text heavy_html = f'
{highlighted_heavy}
' light_html = f'
{highlighted_light}
' return heavy_html, light_html except Exception as e: return f"Error during restoration: {str(e)}", "" def highlight_restored_residues(original_seq, restored_seq): """ Highlight restored residues in green. """ if not original_seq or not restored_seq: return restored_seq highlighted = "" for i, (orig_char, rest_char) in enumerate(zip(original_seq, restored_seq)): if orig_char == '*' and rest_char != '*': # This residue was restored highlighted += f'{rest_char}' else: highlighted += rest_char # Add any remaining characters from restored sequence if len(restored_seq) > len(original_seq): highlighted += restored_seq[len(original_seq):] return highlighted # Create Gradio interface with gr.Blocks(title="AbLang2 Sequence Restorer", theme=gr.themes.Soft(), css=""" * { font-family: 'Courier New', monospace !important; } .sequence-input, .sequence-output { font-family: 'Courier New', monospace !important; font-size: 14px !important; letter-spacing: 0.5px !important; } .restored-highlight { background-color: #90EE90 !important; color: #000 !important; font-weight: bold !important; } .examples { font-family: 'Courier New', monospace !important; font-size: 14px !important; letter-spacing: 0.5px !important; } .restored-sequence-box { font-family: 'Courier New', monospace !important; font-size: 14px !important; letter-spacing: 0.5px !important; white-space: pre-wrap !important; word-wrap: break-word !important; overflow-wrap: break-word !important; } .restored-heading { color: #2E8B57 !important; font-weight: bold !important; font-size: 18px !important; } .example-text { font-family: 'Courier New', monospace !important; font-size: 12px !important; white-space: pre-wrap !important; word-wrap: break-word !important; } .examples-table { font-family: 'Courier New', monospace !important; font-size: 12px !important; white-space: pre-wrap !important; word-wrap: break-word !important; max-width: none !important; overflow: visible !important; } .examples-table td { font-family: 'Courier New', monospace !important; font-size: 12px !important; white-space: pre-wrap !important; word-wrap: break-word !important; max-width: none !important; overflow: visible !important; text-overflow: unset !important; } .sequence-output label { font-weight: bold !important; color: #495057 !important; font-size: 14px !important; margin-bottom: 5px !important; } /* Force full display of examples */ .examples-container { font-family: 'Courier New', monospace !important; font-size: 12px !important; } .examples-container table { width: 100% !important; table-layout: auto !important; } .examples-container td { white-space: pre-wrap !important; word-wrap: break-word !important; overflow-wrap: break-word !important; max-width: none !important; text-overflow: unset !important; padding: 8px !important; vertical-align: top !important; } .examples-container th { white-space: nowrap !important; padding: 8px !important; } /* Override any Gradio default truncation */ .examples table td { white-space: pre-wrap !important; word-wrap: break-word !important; overflow-wrap: break-word !important; max-width: none !important; text-overflow: unset !important; overflow: visible !important; font-family: 'Courier New', monospace !important; font-size: 12px !important; } .examples table { table-layout: auto !important; width: 100% !important; } /* Target the specific examples component */ div[data-testid="examples"] table td { white-space: pre-wrap !important; word-wrap: break-word !important; overflow-wrap: break-word !important; max-width: none !important; text-overflow: unset !important; overflow: visible !important; font-family: 'Courier New', monospace !important; font-size: 12px !important; } /* Force examples to show full content */ .examples table, .examples table td, .examples table th { white-space: pre-wrap !important; word-wrap: break-word !important; overflow-wrap: break-word !important; max-width: none !important; text-overflow: unset !important; overflow: visible !important; font-family: 'Courier New', monospace !important; font-size: 12px !important; table-layout: auto !important; width: auto !important; min-width: 100% !important; } /* Override any inline styles */ .examples * { white-space: pre-wrap !important; word-wrap: break-word !important; overflow-wrap: break-word !important; max-width: none !important; text-overflow: unset !important; overflow: visible !important; } /* Style output labels to match input labels exactly */ .output-label { font-weight: 600 !important; color: var(--label-text-color) !important; font-size: 14px !important; margin-bottom: 8px !important; margin-top: 16px !important; line-height: 1.4 !important; display: block !important; } """) as demo: gr.Markdown(""" # 🧬 AbLang2 Sequence Restorer This app uses the AbLang2 model to restore masked residues (*) in antibody sequences. You can provide either one or both heavy and light chain sequences. **Instructions:** - Use `*` to mask residues you want to restore - Provide heavy chain, light chain, or both - Enable "Use Alignment" for variable missing lengths """) with gr.Row(): with gr.Column(): heavy_input = gr.Textbox( label="Heavy Chain Sequence", placeholder="Enter heavy chain sequence with masked residues (*)...", lines=3, max_lines=5, elem_classes=["sequence-input"] ) light_input = gr.Textbox( label="Light Chain Sequence", placeholder="Enter light chain sequence with masked residues (*)...", lines=3, max_lines=5, elem_classes=["sequence-input"] ) align_checkbox = gr.Checkbox( label="Use Alignment (for variable missing lengths)", value=False ) restore_btn = gr.Button("🔄 Restore Sequences", variant="primary") with gr.Column(): gr.Markdown("### 🧬 Restored Sequences", elem_classes=["restored-heading"]) gr.Markdown("*Green highlighting shows restored residues*") gr.Markdown("**Heavy Chain Sequence**", elem_classes=["output-label"]) heavy_output = gr.HTML(label="") gr.Markdown("**Light Chain Sequence**", elem_classes=["output-label"]) light_output = gr.HTML(label="") # Example sequences gr.Examples( examples=[ [ "EVQ***SGGEVKKPGASVKVSCRASGYTFRNYGLTWVRQAPGQGLEWMGWISAYNGNTNYAQKFQGRVTLTTDTSTSTAYMELRSLRSDDTAVYFCAR**PGHGAAFMDVWGTGTTVTVSS", "DIQLTQSPLSLPVTLGQPASISCRSS*SLEASDTNIYLSWFQQRPGQSPRRLIYKI*NRDSGVPDRFSGSGSGTHFTLRISRVEADDVAVYYCMQGTHWPPAFGQGTKVDIK" ], [ "EVQLVESGGGLVQPGGSLRLSCAASGFTFSSYAMGWVRQAPGKGLEWVSAISGSGGSTYYADSVKGRFTISRDNSKNTLYLQMNSLRAEDTAVYYCARDY**GMDVWGQGTTVTVSS", "" ], [ "", "DIQLTQSPSSLSASVGDRVTITCRASQSISSYLNWYQQKPGKAPKLLIY*ASSLQSGVPSRFSGSGSGTDFTLTISSLQPEDFATYYCQQSYSTP*TFGQGTKVEIK" ] ], inputs=[heavy_input, light_input], label="Example Sequences" ) # Connect the button to the function restore_btn.click( fn=restore_sequences, inputs=[heavy_input, light_input, align_checkbox], outputs=[heavy_output, light_output] ) gr.Markdown(""" --- **Note:** This app uses the AbLang2 model from Hugging Face Hub. The restoration process may take a few seconds depending on sequence length and complexity. """) if __name__ == "__main__": print("Starting AbLang2 Sequence Restorer...") demo.launch()