| import gradio as gr |
| import sys |
| import os |
| from transformers import AutoModel, AutoTokenizer |
| from transformers.utils import cached_file |
|
|
| |
| model = AutoModel.from_pretrained("hemantn/ablang2", trust_remote_code=True) |
| tokenizer = AutoTokenizer.from_pretrained("hemantn/ablang2", trust_remote_code=True) |
|
|
| |
| adapter_path = cached_file("hemantn/ablang2", "adapter.py") |
| cached_model_dir = os.path.dirname(adapter_path) |
| sys.path.insert(0, cached_model_dir) |
|
|
| |
| from adapter import AbLang2PairedHuggingFaceAdapter |
| ablang = AbLang2PairedHuggingFaceAdapter(model=model, tokenizer=tokenizer) |
|
|
| def restore_sequences(heavy_chain, light_chain, use_align=False): |
| """ |
| Restore masked residues in antibody sequences. |
| |
| Args: |
| heavy_chain (str): Heavy chain sequence with masked residues (*) |
| light_chain (str): Light chain sequence with masked residues (*) |
| use_align (bool): Whether to use alignment for variable missing lengths |
| |
| Returns: |
| tuple: (restored_heavy, restored_light, highlighted_heavy, highlighted_light) |
| """ |
| try: |
| |
| if use_align: |
| try: |
| import anarci |
| except ImportError: |
| return "Alignment feature requires 'anarci' package which is not available. Please disable alignment option.", "", "" |
| |
| if heavy_chain.strip() and light_chain.strip(): |
| |
| sequences = [[heavy_chain.strip(), light_chain.strip()]] |
| elif heavy_chain.strip(): |
| |
| sequences = [[heavy_chain.strip(), ""]] |
| elif light_chain.strip(): |
| |
| sequences = [["", light_chain.strip()]] |
| else: |
| return "Please provide at least one antibody chain sequence.", "", "", "" |
| |
| |
| restored = ablang(sequences, mode='restore', align=use_align) |
| |
| |
| if hasattr(restored, '__len__') and len(restored) > 0: |
| result = restored[0] |
| |
| |
| if '>|<' in result: |
| |
| heavy_part = result.split('>|<')[0].replace('<', '').replace('>', '') |
| light_part = result.split('>|<')[1].replace('<', '').replace('>', '') |
| elif result.startswith('<') and result.endswith('>'): |
| |
| if heavy_chain.strip(): |
| heavy_part = result.replace('<', '').replace('>', '') |
| light_part = "" |
| else: |
| heavy_part = "" |
| light_part = result.replace('<', '').replace('>', '') |
| else: |
| return "Error: Unexpected result format.", "", "", "" |
| |
| |
| highlighted_heavy = highlight_restored_residues(heavy_chain.strip(), heavy_part) |
| highlighted_light = highlight_restored_residues(light_chain.strip(), light_part) |
| |
| |
| heavy_html = f'<div class="restored-sequence-box" style="padding: 10px; background-color: #f8f9fa; border: 1px solid #dee2e6; border-radius: 4px;">{highlighted_heavy}</div>' |
| light_html = f'<div class="restored-sequence-box" style="padding: 10px; background-color: #f8f9fa; border: 1px solid #dee2e6; border-radius: 4px;">{highlighted_light}</div>' |
| |
| return heavy_html, light_html |
| else: |
| return "Error: No restoration result obtained.", "", "" |
| |
| except Exception as e: |
| return f"Error during restoration: {str(e)}", "", "" |
|
|
| def highlight_restored_residues(original_seq, restored_seq): |
| """ |
| Highlight restored residues in green. |
| """ |
| if not original_seq or not restored_seq: |
| return restored_seq |
| |
| highlighted = "" |
| for i, (orig_char, rest_char) in enumerate(zip(original_seq, restored_seq)): |
| if orig_char == '*' and rest_char != '*': |
| |
| highlighted += f'<span class="restored-highlight">{rest_char}</span>' |
| else: |
| highlighted += rest_char |
| |
| |
| if len(restored_seq) > len(original_seq): |
| highlighted += restored_seq[len(original_seq):] |
| |
| return highlighted |
|
|
| |
| with gr.Blocks(title="AbLang2 Sequence Restorer", theme=gr.themes.Soft(), css=""" |
| * { |
| font-family: 'Courier New', monospace !important; |
| } |
| .sequence-input, .sequence-output { |
| font-family: 'Courier New', monospace !important; |
| font-size: 14px !important; |
| letter-spacing: 0.5px !important; |
| } |
| .restored-highlight { |
| background-color: #90EE90 !important; |
| color: #000 !important; |
| font-weight: bold !important; |
| } |
| .examples { |
| font-family: 'Courier New', monospace !important; |
| font-size: 14px !important; |
| letter-spacing: 0.5px !important; |
| } |
| .restored-sequence-box { |
| font-family: 'Courier New', monospace !important; |
| font-size: 14px !important; |
| letter-spacing: 0.5px !important; |
| white-space: pre-wrap !important; |
| word-wrap: break-word !important; |
| overflow-wrap: break-word !important; |
| } |
| .restored-heading { |
| color: #2E8B57 !important; |
| font-weight: bold !important; |
| font-size: 18px !important; |
| } |
| .example-text { |
| font-family: 'Courier New', monospace !important; |
| font-size: 12px !important; |
| white-space: pre-wrap !important; |
| word-wrap: break-word !important; |
| } |
| .examples-table { |
| font-family: 'Courier New', monospace !important; |
| font-size: 12px !important; |
| white-space: pre-wrap !important; |
| word-wrap: break-word !important; |
| max-width: none !important; |
| overflow: visible !important; |
| } |
| .examples-table td { |
| font-family: 'Courier New', monospace !important; |
| font-size: 12px !important; |
| white-space: pre-wrap !important; |
| word-wrap: break-word !important; |
| max-width: none !important; |
| overflow: visible !important; |
| text-overflow: unset !important; |
| } |
| .sequence-output label { |
| font-weight: bold !important; |
| color: #495057 !important; |
| font-size: 14px !important; |
| margin-bottom: 5px !important; |
| } |
| /* Force full display of examples */ |
| .examples-container { |
| font-family: 'Courier New', monospace !important; |
| font-size: 12px !important; |
| } |
| .examples-container table { |
| width: 100% !important; |
| table-layout: auto !important; |
| } |
| .examples-container td { |
| white-space: pre-wrap !important; |
| word-wrap: break-word !important; |
| overflow-wrap: break-word !important; |
| max-width: none !important; |
| text-overflow: unset !important; |
| padding: 8px !important; |
| vertical-align: top !important; |
| } |
| .examples-container th { |
| white-space: nowrap !important; |
| padding: 8px !important; |
| } |
| /* Override any Gradio default truncation */ |
| .examples table td { |
| white-space: pre-wrap !important; |
| word-wrap: break-word !important; |
| overflow-wrap: break-word !important; |
| max-width: none !important; |
| text-overflow: unset !important; |
| overflow: visible !important; |
| font-family: 'Courier New', monospace !important; |
| font-size: 12px !important; |
| } |
| .examples table { |
| table-layout: auto !important; |
| width: 100% !important; |
| } |
| /* Target the specific examples component */ |
| div[data-testid="examples"] table td { |
| white-space: pre-wrap !important; |
| word-wrap: break-word !important; |
| overflow-wrap: break-word !important; |
| max-width: none !important; |
| text-overflow: unset !important; |
| overflow: visible !important; |
| font-family: 'Courier New', monospace !important; |
| font-size: 12px !important; |
| } |
| /* Force examples to show full content */ |
| .examples table, .examples table td, .examples table th { |
| white-space: pre-wrap !important; |
| word-wrap: break-word !important; |
| overflow-wrap: break-word !important; |
| max-width: none !important; |
| text-overflow: unset !important; |
| overflow: visible !important; |
| font-family: 'Courier New', monospace !important; |
| font-size: 12px !important; |
| table-layout: auto !important; |
| width: auto !important; |
| min-width: 100% !important; |
| } |
| /* Override any inline styles */ |
| .examples * { |
| white-space: pre-wrap !important; |
| word-wrap: break-word !important; |
| overflow-wrap: break-word !important; |
| max-width: none !important; |
| text-overflow: unset !important; |
| overflow: visible !important; |
| } |
| /* Style output labels to match input labels exactly */ |
| .output-label { |
| font-weight: 600 !important; |
| color: var(--label-text-color) !important; |
| font-size: 14px !important; |
| margin-bottom: 8px !important; |
| margin-top: 16px !important; |
| line-height: 1.4 !important; |
| display: block !important; |
| } |
| """) as demo: |
| gr.Markdown(""" |
| # 🧬 AbLang2 Sequence Restorer |
| |
| This app uses the AbLang2 model to restore masked residues (*) in antibody sequences. |
| You can provide either one or both heavy and light chain sequences. |
| |
| **Instructions:** |
| - Use `*` to mask residues you want to restore |
| - Provide heavy chain, light chain, or both |
| - Enable "Use Alignment" for variable missing lengths |
| """) |
| |
| with gr.Row(): |
| with gr.Column(): |
| heavy_input = gr.Textbox( |
| label="Heavy Chain Sequence", |
| placeholder="Enter heavy chain sequence with masked residues (*)...", |
| lines=3, |
| max_lines=5, |
| elem_classes=["sequence-input"] |
| ) |
| |
| light_input = gr.Textbox( |
| label="Light Chain Sequence", |
| placeholder="Enter light chain sequence with masked residues (*)...", |
| lines=3, |
| max_lines=5, |
| elem_classes=["sequence-input"] |
| ) |
| |
| align_checkbox = gr.Checkbox( |
| label="Use Alignment (for variable missing lengths) - Requires anarci package", |
| value=False |
| ) |
| |
| restore_btn = gr.Button("🔄 Restore Sequences", variant="primary") |
| |
| with gr.Column(): |
| gr.Markdown("### 🧬 Restored Sequences", elem_classes=["restored-heading"]) |
| gr.Markdown("*Green highlighting shows restored residues*") |
| |
| gr.Markdown("**Heavy Chain Sequence**", elem_classes=["output-label"]) |
| heavy_output = gr.HTML(label="") |
| |
| gr.Markdown("**Light Chain Sequence**", elem_classes=["output-label"]) |
| light_output = gr.HTML(label="") |
| |
| |
| gr.Examples( |
| examples=[ |
| [ |
| "EVQ***SGGEVKKPGASVKVSCRASGYTFRNYGLTWVRQAPGQGLEWMGWISAYNGNTNYAQKFQGRVTLTTDTSTSTAYMELRSLRSDDTAVYFCAR**PGHGAAFMDVWGTGTTVTVSS", |
| "DIQLTQSPLSLPVTLGQPASISCRSS*SLEASDTNIYLSWFQQRPGQSPRRLIYKI*NRDSGVPDRFSGSGSGTHFTLRISRVEADDVAVYYCMQGTHWPPAFGQGTKVDIK" |
| ], |
| [ |
| "EVQLVESGGGLVQPGGSLRLSCAASGFTFSSYAMGWVRQAPGKGLEWVSAISGSGGSTYYADSVKGRFTISRDNSKNTLYLQMNSLRAEDTAVYYCARDY**GMDVWGQGTTVTVSS", |
| "" |
| ], |
| [ |
| "", |
| "DIQLTQSPSSLSASVGDRVTITCRASQSISSYLNWYQQKPGKAPKLLIY*ASSLQSGVPSRFSGSGSGTDFTLTISSLQPEDFATYYCQQSYSTP*TFGQGTKVEIK" |
| ] |
| ], |
| inputs=[heavy_input, light_input], |
| label="Example Sequences" |
| ) |
| |
| |
| restore_btn.click( |
| fn=restore_sequences, |
| inputs=[heavy_input, light_input, align_checkbox], |
| outputs=[heavy_output, light_output] |
| ) |
| |
| gr.Markdown(""" |
| --- |
| **Note:** This app uses the AbLang2 model from Hugging Face Hub. |
| The restoration process may take a few seconds depending on sequence length and complexity. |
| """) |
|
|
| if __name__ == "__main__": |
| demo.launch() |