Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import sys | |
| import os | |
| from transformers import AutoModel, AutoTokenizer | |
| from transformers.utils import cached_file | |
| # Load model and tokenizer from Hugging Face Hub | |
| model = AutoModel.from_pretrained("hemantn/ablang2", trust_remote_code=True) | |
| tokenizer = AutoTokenizer.from_pretrained("hemantn/ablang2", trust_remote_code=True) | |
| # Find the cached model directory and import adapter | |
| adapter_path = cached_file("hemantn/ablang2", "adapter.py") | |
| cached_model_dir = os.path.dirname(adapter_path) | |
| sys.path.insert(0, cached_model_dir) | |
| # Import and create the adapter | |
| from adapter import AbLang2PairedHuggingFaceAdapter | |
| ablang = AbLang2PairedHuggingFaceAdapter(model=model, tokenizer=tokenizer) | |
| def restore_sequences(heavy_chain, light_chain, use_align=False): | |
| """ | |
| Restore masked residues in antibody sequences. | |
| Args: | |
| heavy_chain (str): Heavy chain sequence with masked residues (*) | |
| light_chain (str): Light chain sequence with masked residues (*) | |
| use_align (bool): Whether to use alignment for variable missing lengths | |
| Returns: | |
| tuple: (restored_heavy, restored_light, highlighted_heavy, highlighted_light) | |
| """ | |
| try: | |
| # Prepare input sequences | |
| if heavy_chain.strip() and light_chain.strip(): | |
| # Both chains provided | |
| sequences = [[heavy_chain.strip(), light_chain.strip()]] | |
| elif heavy_chain.strip(): | |
| # Only heavy chain provided | |
| sequences = [[heavy_chain.strip(), ""]] | |
| elif light_chain.strip(): | |
| # Only light chain provided | |
| sequences = [["", light_chain.strip()]] | |
| else: | |
| return "Please provide at least one antibody chain sequence.", "", "", "" | |
| # Perform restoration | |
| restored = ablang(sequences, mode='restore', align=use_align) | |
| # Format output | |
| if hasattr(restored, '__len__') and len(restored) > 0: | |
| result = restored[0] # Get the first (and only) result | |
| # Parse the result to separate heavy and light chains | |
| if '>|<' in result: | |
| # Both chains present | |
| heavy_part = result.split('>|<')[0].replace('<', '').replace('>', '') | |
| light_part = result.split('>|<')[1].replace('<', '').replace('>', '') | |
| elif result.startswith('<') and result.endswith('>'): | |
| # Only one chain present | |
| if heavy_chain.strip(): | |
| heavy_part = result.replace('<', '').replace('>', '') | |
| light_part = "" | |
| else: | |
| heavy_part = "" | |
| light_part = result.replace('<', '').replace('>', '') | |
| else: | |
| return "Error: Unexpected result format.", "", "", "" | |
| # Create highlighted versions | |
| highlighted_heavy = highlight_restored_residues(heavy_chain.strip(), heavy_part) | |
| highlighted_light = highlight_restored_residues(light_chain.strip(), light_part) | |
| # Create HTML outputs with proper styling - no scroll, wrap text | |
| heavy_html = f'<div class="restored-sequence-box" style="padding: 10px; background-color: #f8f9fa; border: 1px solid #dee2e6; border-radius: 4px;">{highlighted_heavy}</div>' | |
| light_html = f'<div class="restored-sequence-box" style="padding: 10px; background-color: #f8f9fa; border: 1px solid #dee2e6; border-radius: 4px;">{highlighted_light}</div>' | |
| return heavy_html, light_html | |
| else: | |
| return "Error: No restoration result obtained.", "", "" | |
| except Exception as e: | |
| return f"Error during restoration: {str(e)}", "", "" | |
| def highlight_restored_residues(original_seq, restored_seq): | |
| """ | |
| Highlight restored residues in green. | |
| """ | |
| if not original_seq or not restored_seq: | |
| return restored_seq | |
| highlighted = "" | |
| for i, (orig_char, rest_char) in enumerate(zip(original_seq, restored_seq)): | |
| if orig_char == '*' and rest_char != '*': | |
| # This residue was restored | |
| highlighted += f'<span class="restored-highlight">{rest_char}</span>' | |
| else: | |
| highlighted += rest_char | |
| # Add any remaining characters from restored sequence | |
| if len(restored_seq) > len(original_seq): | |
| highlighted += restored_seq[len(original_seq):] | |
| return highlighted | |
| # Create Gradio interface | |
| with gr.Blocks(title="AbLang2 Sequence Restorer", theme=gr.themes.Soft(), css=""" | |
| * { | |
| font-family: 'Courier New', monospace !important; | |
| } | |
| .sequence-input, .sequence-output { | |
| font-family: 'Courier New', monospace !important; | |
| font-size: 14px !important; | |
| letter-spacing: 0.5px !important; | |
| } | |
| .restored-highlight { | |
| background-color: #90EE90 !important; | |
| color: #000 !important; | |
| font-weight: bold !important; | |
| } | |
| .examples { | |
| font-family: 'Courier New', monospace !important; | |
| font-size: 14px !important; | |
| letter-spacing: 0.5px !important; | |
| } | |
| .restored-sequence-box { | |
| font-family: 'Courier New', monospace !important; | |
| font-size: 14px !important; | |
| letter-spacing: 0.5px !important; | |
| white-space: pre-wrap !important; | |
| word-wrap: break-word !important; | |
| overflow-wrap: break-word !important; | |
| } | |
| .restored-heading { | |
| color: #2E8B57 !important; | |
| font-weight: bold !important; | |
| font-size: 18px !important; | |
| } | |
| .example-text { | |
| font-family: 'Courier New', monospace !important; | |
| font-size: 12px !important; | |
| white-space: pre-wrap !important; | |
| word-wrap: break-word !important; | |
| } | |
| .examples-table { | |
| font-family: 'Courier New', monospace !important; | |
| font-size: 12px !important; | |
| white-space: pre-wrap !important; | |
| word-wrap: break-word !important; | |
| max-width: none !important; | |
| overflow: visible !important; | |
| } | |
| .examples-table td { | |
| font-family: 'Courier New', monospace !important; | |
| font-size: 12px !important; | |
| white-space: pre-wrap !important; | |
| word-wrap: break-word !important; | |
| max-width: none !important; | |
| overflow: visible !important; | |
| text-overflow: unset !important; | |
| } | |
| .sequence-output label { | |
| font-weight: bold !important; | |
| color: #495057 !important; | |
| font-size: 14px !important; | |
| margin-bottom: 5px !important; | |
| } | |
| /* Force full display of examples */ | |
| .examples-container { | |
| font-family: 'Courier New', monospace !important; | |
| font-size: 12px !important; | |
| } | |
| .examples-container table { | |
| width: 100% !important; | |
| table-layout: auto !important; | |
| } | |
| .examples-container td { | |
| white-space: pre-wrap !important; | |
| word-wrap: break-word !important; | |
| overflow-wrap: break-word !important; | |
| max-width: none !important; | |
| text-overflow: unset !important; | |
| padding: 8px !important; | |
| vertical-align: top !important; | |
| } | |
| .examples-container th { | |
| white-space: nowrap !important; | |
| padding: 8px !important; | |
| } | |
| /* Override any Gradio default truncation */ | |
| .examples table td { | |
| white-space: pre-wrap !important; | |
| word-wrap: break-word !important; | |
| overflow-wrap: break-word !important; | |
| max-width: none !important; | |
| text-overflow: unset !important; | |
| overflow: visible !important; | |
| font-family: 'Courier New', monospace !important; | |
| font-size: 12px !important; | |
| } | |
| .examples table { | |
| table-layout: auto !important; | |
| width: 100% !important; | |
| } | |
| /* Target the specific examples component */ | |
| div[data-testid="examples"] table td { | |
| white-space: pre-wrap !important; | |
| word-wrap: break-word !important; | |
| overflow-wrap: break-word !important; | |
| max-width: none !important; | |
| text-overflow: unset !important; | |
| overflow: visible !important; | |
| font-family: 'Courier New', monospace !important; | |
| font-size: 12px !important; | |
| } | |
| /* Force examples to show full content */ | |
| .examples table, .examples table td, .examples table th { | |
| white-space: pre-wrap !important; | |
| word-wrap: break-word !important; | |
| overflow-wrap: break-word !important; | |
| max-width: none !important; | |
| text-overflow: unset !important; | |
| overflow: visible !important; | |
| font-family: 'Courier New', monospace !important; | |
| font-size: 12px !important; | |
| table-layout: auto !important; | |
| width: auto !important; | |
| min-width: 100% !important; | |
| } | |
| /* Override any inline styles */ | |
| .examples * { | |
| white-space: pre-wrap !important; | |
| word-wrap: break-word !important; | |
| overflow-wrap: break-word !important; | |
| max-width: none !important; | |
| text-overflow: unset !important; | |
| overflow: visible !important; | |
| } | |
| /* Style output labels to match input labels exactly */ | |
| .output-label { | |
| font-weight: 600 !important; | |
| color: var(--label-text-color) !important; | |
| font-size: 14px !important; | |
| margin-bottom: 8px !important; | |
| margin-top: 16px !important; | |
| line-height: 1.4 !important; | |
| display: block !important; | |
| } | |
| """) as demo: | |
| gr.Markdown(""" | |
| # 🧬 AbLang2 Sequence Restorer | |
| This app uses the AbLang2 model to restore masked residues (*) in antibody sequences. | |
| You can provide either one or both heavy and light chain sequences. | |
| **Instructions:** | |
| - Use `*` to mask residues you want to restore | |
| - Provide heavy chain, light chain, or both | |
| - Enable "Use Alignment" for variable missing lengths | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| heavy_input = gr.Textbox( | |
| label="Heavy Chain Sequence", | |
| placeholder="Enter heavy chain sequence with masked residues (*)...", | |
| lines=3, | |
| max_lines=5, | |
| elem_classes=["sequence-input"] | |
| ) | |
| light_input = gr.Textbox( | |
| label="Light Chain Sequence", | |
| placeholder="Enter light chain sequence with masked residues (*)...", | |
| lines=3, | |
| max_lines=5, | |
| elem_classes=["sequence-input"] | |
| ) | |
| align_checkbox = gr.Checkbox( | |
| label="Use Alignment (for variable missing lengths)", | |
| value=False | |
| ) | |
| restore_btn = gr.Button("🔄 Restore Sequences", variant="primary") | |
| with gr.Column(): | |
| gr.Markdown("### 🧬 Restored Sequences", elem_classes=["restored-heading"]) | |
| gr.Markdown("*Green highlighting shows restored residues*") | |
| gr.Markdown("**Heavy Chain Sequence**", elem_classes=["output-label"]) | |
| heavy_output = gr.HTML(label="") | |
| gr.Markdown("**Light Chain Sequence**", elem_classes=["output-label"]) | |
| light_output = gr.HTML(label="") | |
| # Example sequences | |
| gr.Examples( | |
| examples=[ | |
| [ | |
| "EVQ***SGGEVKKPGASVKVSCRASGYTFRNYGLTWVRQAPGQGLEWMGWISAYNGNTNYAQKFQGRVTLTTDTSTSTAYMELRSLRSDDTAVYFCAR**PGHGAAFMDVWGTGTTVTVSS", | |
| "DIQLTQSPLSLPVTLGQPASISCRSS*SLEASDTNIYLSWFQQRPGQSPRRLIYKI*NRDSGVPDRFSGSGSGTHFTLRISRVEADDVAVYYCMQGTHWPPAFGQGTKVDIK" | |
| ], | |
| [ | |
| "EVQLVESGGGLVQPGGSLRLSCAASGFTFSSYAMGWVRQAPGKGLEWVSAISGSGGSTYYADSVKGRFTISRDNSKNTLYLQMNSLRAEDTAVYYCARDY**GMDVWGQGTTVTVSS", | |
| "" | |
| ], | |
| [ | |
| "", | |
| "DIQLTQSPSSLSASVGDRVTITCRASQSISSYLNWYQQKPGKAPKLLIY*ASSLQSGVPSRFSGSGSGTDFTLTISSLQPEDFATYYCQQSYSTP*TFGQGTKVEIK" | |
| ] | |
| ], | |
| inputs=[heavy_input, light_input], | |
| label="Example Sequences" | |
| ) | |
| # Connect the button to the function | |
| restore_btn.click( | |
| fn=restore_sequences, | |
| inputs=[heavy_input, light_input, align_checkbox], | |
| outputs=[heavy_output, light_output] | |
| ) | |
| gr.Markdown(""" | |
| --- | |
| **Note:** This app uses the AbLang2 model from Hugging Face Hub. | |
| The restoration process may take a few seconds depending on sequence length and complexity. | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch() |