Spaces:

hemantn
/

ablang2_seq_restore

Build error

App Files Files Community

hemantn commited on Jul 27, 2025

Commit

91a080f

1 Parent(s): 7fdf708

Add Docker support with anarci installation

Browse files

Files changed (5) hide show

.dockerignore +26 -0
Dockerfile +34 -0
app_backup.py +330 -0
requirements.txt +0 -1
setup.py +16 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,26 @@

+.git
+.gitignore
+README.md
+README_Spaces.md
+app_backup.py
+setup.py
+__pycache__
+*.pyc
+*.pyo
+*.pyd
+.Python
+env
+pip-log.txt
+pip-delete-this-directory.txt
+.tox
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.log
+.git
+.mypy_cache
+.pytest_cache
+.hypothesis

Dockerfile ADDED Viewed

	@@ -0,0 +1,34 @@

+FROM python:3.10-slim
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    git \
+    build-essential \
+    wget \
+    curl \
+    bash \
+    && rm -rf /var/lib/apt/lists/*
+# Set working directory
+WORKDIR /app
+# Copy requirements and install Python dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Install anarci from GitHub with all its dependencies
+RUN pip install git+https://github.com/oxpig/ANARCI.git
+# Copy application files
+COPY app.py .
+COPY adapter.py .
+# Expose port for Gradio
+EXPOSE 7860
+# Set environment variables
+ENV GRADIO_SERVER_NAME=0.0.0.0
+ENV GRADIO_SERVER_PORT=7860
+# Run the application
+CMD ["python", "app.py"]

app_backup.py ADDED Viewed

	@@ -0,0 +1,330 @@

+import gradio as gr
+import sys
+import os
+from transformers import AutoModel, AutoTokenizer
+from transformers.utils import cached_file
+# Load model and tokenizer from Hugging Face Hub
+model = AutoModel.from_pretrained("hemantn/ablang2", trust_remote_code=True)
+tokenizer = AutoTokenizer.from_pretrained("hemantn/ablang2", trust_remote_code=True)
+# Find the cached model directory and import adapter
+adapter_path = cached_file("hemantn/ablang2", "adapter.py")
+cached_model_dir = os.path.dirname(adapter_path)
+sys.path.insert(0, cached_model_dir)
+# Import and create the adapter
+from adapter import AbLang2PairedHuggingFaceAdapter
+ablang = AbLang2PairedHuggingFaceAdapter(model=model, tokenizer=tokenizer)
+def restore_sequences(heavy_chain, light_chain, use_align=False):
+    """
+    Restore masked residues in antibody sequences.
+    Args:
+        heavy_chain (str): Heavy chain sequence with masked residues (*)
+        light_chain (str): Light chain sequence with masked residues (*)
+        use_align (bool): Whether to use alignment for variable missing lengths
+    Returns:
+        tuple: (restored_heavy, restored_light, highlighted_heavy, highlighted_light)
+    """
+    try:
+        # Prepare input sequences
+        if heavy_chain.strip() and light_chain.strip():
+            # Both chains provided
+            sequences = [[heavy_chain.strip(), light_chain.strip()]]
+        elif heavy_chain.strip():
+            # Only heavy chain provided
+            sequences = [[heavy_chain.strip(), ""]]
+        elif light_chain.strip():
+            # Only light chain provided
+            sequences = [["", light_chain.strip()]]
+        else:
+            return "Please provide at least one antibody chain sequence.", "", "", ""
+        # Perform restoration
+        restored = ablang(sequences, mode='restore', align=use_align)
+        # Format output
+        if hasattr(restored, '__len__') and len(restored) > 0:
+            result = restored[0]  # Get the first (and only) result
+            # Parse the result to separate heavy and light chains
+            if '>|<' in result:
+                # Both chains present
+                heavy_part = result.split('>|<')[0].replace('<', '').replace('>', '')
+                light_part = result.split('>|<')[1].replace('<', '').replace('>', '')
+            elif result.startswith('<') and result.endswith('>'):
+                # Only one chain present
+                if heavy_chain.strip():
+                    heavy_part = result.replace('<', '').replace('>', '')
+                    light_part = ""
+                else:
+                    heavy_part = ""
+                    light_part = result.replace('<', '').replace('>', '')
+            else:
+                return "Error: Unexpected result format.", "", "", ""
+            # Create highlighted versions
+            highlighted_heavy = highlight_restored_residues(heavy_chain.strip(), heavy_part)
+            highlighted_light = highlight_restored_residues(light_chain.strip(), light_part)
+            # Create HTML outputs with proper styling - no scroll, wrap text
+            heavy_html = f'<div class="restored-sequence-box" style="padding: 10px; background-color: #f8f9fa; border: 1px solid #dee2e6; border-radius: 4px;">{highlighted_heavy}</div>'
+            light_html = f'<div class="restored-sequence-box" style="padding: 10px; background-color: #f8f9fa; border: 1px solid #dee2e6; border-radius: 4px;">{highlighted_light}</div>'
+            return heavy_html, light_html
+        else:
+            return "Error: No restoration result obtained.", "", ""
+    except Exception as e:
+        return f"Error during restoration: {str(e)}", "", ""
+def highlight_restored_residues(original_seq, restored_seq):
+    """
+    Highlight restored residues in green.
+    """
+    if not original_seq or not restored_seq:
+        return restored_seq
+    highlighted = ""
+    for i, (orig_char, rest_char) in enumerate(zip(original_seq, restored_seq)):
+        if orig_char == '*' and rest_char != '*':
+            # This residue was restored
+            highlighted += f'<span class="restored-highlight">{rest_char}</span>'
+        else:
+            highlighted += rest_char
+    # Add any remaining characters from restored sequence
+    if len(restored_seq) > len(original_seq):
+        highlighted += restored_seq[len(original_seq):]
+    return highlighted
+# Create Gradio interface
+with gr.Blocks(title="AbLang2 Sequence Restorer", theme=gr.themes.Soft(), css="""
+    * {
+        font-family: 'Courier New', monospace !important;
+    }
+    .sequence-input, .sequence-output {
+        font-family: 'Courier New', monospace !important;
+        font-size: 14px !important;
+        letter-spacing: 0.5px !important;
+    }
+    .restored-highlight {
+        background-color: #90EE90 !important;
+        color: #000 !important;
+        font-weight: bold !important;
+    }
+    .examples {
+        font-family: 'Courier New', monospace !important;
+        font-size: 14px !important;
+        letter-spacing: 0.5px !important;
+    }
+    .restored-sequence-box {
+        font-family: 'Courier New', monospace !important;
+        font-size: 14px !important;
+        letter-spacing: 0.5px !important;
+        white-space: pre-wrap !important;
+        word-wrap: break-word !important;
+        overflow-wrap: break-word !important;
+    }
+    .restored-heading {
+        color: #2E8B57 !important;
+        font-weight: bold !important;
+        font-size: 18px !important;
+    }
+    .example-text {
+        font-family: 'Courier New', monospace !important;
+        font-size: 12px !important;
+        white-space: pre-wrap !important;
+        word-wrap: break-word !important;
+    }
+    .examples-table {
+        font-family: 'Courier New', monospace !important;
+        font-size: 12px !important;
+        white-space: pre-wrap !important;
+        word-wrap: break-word !important;
+        max-width: none !important;
+        overflow: visible !important;
+    }
+    .examples-table td {
+        font-family: 'Courier New', monospace !important;
+        font-size: 12px !important;
+        white-space: pre-wrap !important;
+        word-wrap: break-word !important;
+        max-width: none !important;
+        overflow: visible !important;
+        text-overflow: unset !important;
+    }
+    .sequence-output label {
+        font-weight: bold !important;
+        color: #495057 !important;
+        font-size: 14px !important;
+        margin-bottom: 5px !important;
+    }
+    /* Force full display of examples */
+    .examples-container {
+        font-family: 'Courier New', monospace !important;
+        font-size: 12px !important;
+    }
+    .examples-container table {
+        width: 100% !important;
+        table-layout: auto !important;
+    }
+    .examples-container td {
+        white-space: pre-wrap !important;
+        word-wrap: break-word !important;
+        overflow-wrap: break-word !important;
+        max-width: none !important;
+        text-overflow: unset !important;
+        padding: 8px !important;
+        vertical-align: top !important;
+    }
+    .examples-container th {
+        white-space: nowrap !important;
+        padding: 8px !important;
+    }
+    /* Override any Gradio default truncation */
+    .examples table td {
+        white-space: pre-wrap !important;
+        word-wrap: break-word !important;
+        overflow-wrap: break-word !important;
+        max-width: none !important;
+        text-overflow: unset !important;
+        overflow: visible !important;
+        font-family: 'Courier New', monospace !important;
+        font-size: 12px !important;
+    }
+    .examples table {
+        table-layout: auto !important;
+        width: 100% !important;
+    }
+    /* Target the specific examples component */
+    div[data-testid="examples"] table td {
+        white-space: pre-wrap !important;
+        word-wrap: break-word !important;
+        overflow-wrap: break-word !important;
+        max-width: none !important;
+        text-overflow: unset !important;
+        overflow: visible !important;
+        font-family: 'Courier New', monospace !important;
+        font-size: 12px !important;
+    }
+    /* Force examples to show full content */
+    .examples table, .examples table td, .examples table th {
+        white-space: pre-wrap !important;
+        word-wrap: break-word !important;
+        overflow-wrap: break-word !important;
+        max-width: none !important;
+        text-overflow: unset !important;
+        overflow: visible !important;
+        font-family: 'Courier New', monospace !important;
+        font-size: 12px !important;
+        table-layout: auto !important;
+        width: auto !important;
+        min-width: 100% !important;
+    }
+    /* Override any inline styles */
+    .examples * {
+        white-space: pre-wrap !important;
+        word-wrap: break-word !important;
+        overflow-wrap: break-word !important;
+        max-width: none !important;
+        text-overflow: unset !important;
+        overflow: visible !important;
+    }
+    /* Style output labels to match input labels exactly */
+    .output-label {
+        font-weight: 600 !important;
+        color: var(--label-text-color) !important;
+        font-size: 14px !important;
+        margin-bottom: 8px !important;
+        margin-top: 16px !important;
+        line-height: 1.4 !important;
+        display: block !important;
+    }
+""") as demo:
+    gr.Markdown("""
+    # 🧬 AbLang2 Sequence Restorer
+    This app uses the AbLang2 model to restore masked residues (*) in antibody sequences.
+    You can provide either one or both heavy and light chain sequences.
+    **Instructions:**
+    - Use `*` to mask residues you want to restore
+    - Provide heavy chain, light chain, or both
+    - Enable "Use Alignment" for variable missing lengths
+    """)
+    with gr.Row():
+        with gr.Column():
+            heavy_input = gr.Textbox(
+                label="Heavy Chain Sequence",
+                placeholder="Enter heavy chain sequence with masked residues (*)...",
+                lines=3,
+                max_lines=5,
+                elem_classes=["sequence-input"]
+            )
+            light_input = gr.Textbox(
+                label="Light Chain Sequence",
+                placeholder="Enter light chain sequence with masked residues (*)...",
+                lines=3,
+                max_lines=5,
+                elem_classes=["sequence-input"]
+            )
+            align_checkbox = gr.Checkbox(
+                label="Use Alignment (for variable missing lengths)",
+                value=False
+            )
+            restore_btn = gr.Button("🔄 Restore Sequences", variant="primary")
+        with gr.Column():
+            gr.Markdown("### 🧬 Restored Sequences", elem_classes=["restored-heading"])
+            gr.Markdown("*Green highlighting shows restored residues*")
+            gr.Markdown("**Heavy Chain Sequence**", elem_classes=["output-label"])
+            heavy_output = gr.HTML(label="")
+            gr.Markdown("**Light Chain Sequence**", elem_classes=["output-label"])
+            light_output = gr.HTML(label="")
+    # Example sequences
+    gr.Examples(
+        examples=[
+            [
+                "EVQ***SGGEVKKPGASVKVSCRASGYTFRNYGLTWVRQAPGQGLEWMGWISAYNGNTNYAQKFQGRVTLTTDTSTSTAYMELRSLRSDDTAVYFCAR**PGHGAAFMDVWGTGTTVTVSS",
+                "DIQLTQSPLSLPVTLGQPASISCRSS*SLEASDTNIYLSWFQQRPGQSPRRLIYKI*NRDSGVPDRFSGSGSGTHFTLRISRVEADDVAVYYCMQGTHWPPAFGQGTKVDIK"
+            ],
+            [
+                "EVQLVESGGGLVQPGGSLRLSCAASGFTFSSYAMGWVRQAPGKGLEWVSAISGSGGSTYYADSVKGRFTISRDNSKNTLYLQMNSLRAEDTAVYYCARDY**GMDVWGQGTTVTVSS",
+                ""
+            ],
+            [
+                "",
+                "DIQLTQSPSSLSASVGDRVTITCRASQSISSYLNWYQQKPGKAPKLLIY*ASSLQSGVPSRFSGSGSGTDFTLTISSLQPEDFATYYCQQSYSTP*TFGQGTKVEIK"
+            ]
+        ],
+        inputs=[heavy_input, light_input],
+        label="Example Sequences"
+    )
+    # Connect the button to the function
+    restore_btn.click(
+        fn=restore_sequences,
+        inputs=[heavy_input, light_input, align_checkbox],
+        outputs=[heavy_output, light_output]
+    )
+    gr.Markdown("""
+    ---
+    **Note:** This app uses the AbLang2 model from Hugging Face Hub.
+    The restoration process may take a few seconds depending on sequence length and complexity.
+    """)
+if __name__ == "__main__":
+    demo.launch()

requirements.txt CHANGED Viewed

@@ -3,4 +3,3 @@ transformers>=4.30.0
 torch>=2.0.0
 numpy>=1.21.0
 pandas>=1.3.0
-git+https://github.com/oxpig/ANARCI.git

 torch>=2.0.0
 numpy>=1.21.0
 pandas>=1.3.0

setup.py ADDED Viewed

	@@ -0,0 +1,16 @@

+from setuptools import setup
+setup(
+    name="ablang2-restore",
+    version="1.0.0",
+    install_requires=[
+        "gradio>=4.0.0",
+        "transformers>=4.30.0",
+        "torch>=2.0.0",
+        "numpy>=1.21.0",
+        "pandas>=1.3.0",
+    ],
+    dependency_links=[
+        "git+https://github.com/oxpig/ANARCI.git#egg=anarci"
+    ]
+)