hemantn commited on
Commit
91a080f
·
1 Parent(s): 7fdf708

Add Docker support with anarci installation

Browse files
Files changed (5) hide show
  1. .dockerignore +26 -0
  2. Dockerfile +34 -0
  3. app_backup.py +330 -0
  4. requirements.txt +0 -1
  5. setup.py +16 -0
.dockerignore ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .git
2
+ .gitignore
3
+ README.md
4
+ README_Spaces.md
5
+ app_backup.py
6
+ setup.py
7
+ __pycache__
8
+ *.pyc
9
+ *.pyo
10
+ *.pyd
11
+ .Python
12
+ env
13
+ pip-log.txt
14
+ pip-delete-this-directory.txt
15
+ .tox
16
+ .coverage
17
+ .coverage.*
18
+ .cache
19
+ nosetests.xml
20
+ coverage.xml
21
+ *.cover
22
+ *.log
23
+ .git
24
+ .mypy_cache
25
+ .pytest_cache
26
+ .hypothesis
Dockerfile ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ # Install system dependencies
4
+ RUN apt-get update && apt-get install -y \
5
+ git \
6
+ build-essential \
7
+ wget \
8
+ curl \
9
+ bash \
10
+ && rm -rf /var/lib/apt/lists/*
11
+
12
+ # Set working directory
13
+ WORKDIR /app
14
+
15
+ # Copy requirements and install Python dependencies
16
+ COPY requirements.txt .
17
+ RUN pip install --no-cache-dir -r requirements.txt
18
+
19
+ # Install anarci from GitHub with all its dependencies
20
+ RUN pip install git+https://github.com/oxpig/ANARCI.git
21
+
22
+ # Copy application files
23
+ COPY app.py .
24
+ COPY adapter.py .
25
+
26
+ # Expose port for Gradio
27
+ EXPOSE 7860
28
+
29
+ # Set environment variables
30
+ ENV GRADIO_SERVER_NAME=0.0.0.0
31
+ ENV GRADIO_SERVER_PORT=7860
32
+
33
+ # Run the application
34
+ CMD ["python", "app.py"]
app_backup.py ADDED
@@ -0,0 +1,330 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import sys
3
+ import os
4
+ from transformers import AutoModel, AutoTokenizer
5
+ from transformers.utils import cached_file
6
+
7
+ # Load model and tokenizer from Hugging Face Hub
8
+ model = AutoModel.from_pretrained("hemantn/ablang2", trust_remote_code=True)
9
+ tokenizer = AutoTokenizer.from_pretrained("hemantn/ablang2", trust_remote_code=True)
10
+
11
+ # Find the cached model directory and import adapter
12
+ adapter_path = cached_file("hemantn/ablang2", "adapter.py")
13
+ cached_model_dir = os.path.dirname(adapter_path)
14
+ sys.path.insert(0, cached_model_dir)
15
+
16
+ # Import and create the adapter
17
+ from adapter import AbLang2PairedHuggingFaceAdapter
18
+ ablang = AbLang2PairedHuggingFaceAdapter(model=model, tokenizer=tokenizer)
19
+
20
+ def restore_sequences(heavy_chain, light_chain, use_align=False):
21
+ """
22
+ Restore masked residues in antibody sequences.
23
+
24
+ Args:
25
+ heavy_chain (str): Heavy chain sequence with masked residues (*)
26
+ light_chain (str): Light chain sequence with masked residues (*)
27
+ use_align (bool): Whether to use alignment for variable missing lengths
28
+
29
+ Returns:
30
+ tuple: (restored_heavy, restored_light, highlighted_heavy, highlighted_light)
31
+ """
32
+ try:
33
+ # Prepare input sequences
34
+ if heavy_chain.strip() and light_chain.strip():
35
+ # Both chains provided
36
+ sequences = [[heavy_chain.strip(), light_chain.strip()]]
37
+ elif heavy_chain.strip():
38
+ # Only heavy chain provided
39
+ sequences = [[heavy_chain.strip(), ""]]
40
+ elif light_chain.strip():
41
+ # Only light chain provided
42
+ sequences = [["", light_chain.strip()]]
43
+ else:
44
+ return "Please provide at least one antibody chain sequence.", "", "", ""
45
+
46
+ # Perform restoration
47
+ restored = ablang(sequences, mode='restore', align=use_align)
48
+
49
+ # Format output
50
+ if hasattr(restored, '__len__') and len(restored) > 0:
51
+ result = restored[0] # Get the first (and only) result
52
+
53
+ # Parse the result to separate heavy and light chains
54
+ if '>|<' in result:
55
+ # Both chains present
56
+ heavy_part = result.split('>|<')[0].replace('<', '').replace('>', '')
57
+ light_part = result.split('>|<')[1].replace('<', '').replace('>', '')
58
+ elif result.startswith('<') and result.endswith('>'):
59
+ # Only one chain present
60
+ if heavy_chain.strip():
61
+ heavy_part = result.replace('<', '').replace('>', '')
62
+ light_part = ""
63
+ else:
64
+ heavy_part = ""
65
+ light_part = result.replace('<', '').replace('>', '')
66
+ else:
67
+ return "Error: Unexpected result format.", "", "", ""
68
+
69
+ # Create highlighted versions
70
+ highlighted_heavy = highlight_restored_residues(heavy_chain.strip(), heavy_part)
71
+ highlighted_light = highlight_restored_residues(light_chain.strip(), light_part)
72
+
73
+ # Create HTML outputs with proper styling - no scroll, wrap text
74
+ heavy_html = f'<div class="restored-sequence-box" style="padding: 10px; background-color: #f8f9fa; border: 1px solid #dee2e6; border-radius: 4px;">{highlighted_heavy}</div>'
75
+ light_html = f'<div class="restored-sequence-box" style="padding: 10px; background-color: #f8f9fa; border: 1px solid #dee2e6; border-radius: 4px;">{highlighted_light}</div>'
76
+
77
+ return heavy_html, light_html
78
+ else:
79
+ return "Error: No restoration result obtained.", "", ""
80
+
81
+ except Exception as e:
82
+ return f"Error during restoration: {str(e)}", "", ""
83
+
84
+ def highlight_restored_residues(original_seq, restored_seq):
85
+ """
86
+ Highlight restored residues in green.
87
+ """
88
+ if not original_seq or not restored_seq:
89
+ return restored_seq
90
+
91
+ highlighted = ""
92
+ for i, (orig_char, rest_char) in enumerate(zip(original_seq, restored_seq)):
93
+ if orig_char == '*' and rest_char != '*':
94
+ # This residue was restored
95
+ highlighted += f'<span class="restored-highlight">{rest_char}</span>'
96
+ else:
97
+ highlighted += rest_char
98
+
99
+ # Add any remaining characters from restored sequence
100
+ if len(restored_seq) > len(original_seq):
101
+ highlighted += restored_seq[len(original_seq):]
102
+
103
+ return highlighted
104
+
105
+ # Create Gradio interface
106
+ with gr.Blocks(title="AbLang2 Sequence Restorer", theme=gr.themes.Soft(), css="""
107
+ * {
108
+ font-family: 'Courier New', monospace !important;
109
+ }
110
+ .sequence-input, .sequence-output {
111
+ font-family: 'Courier New', monospace !important;
112
+ font-size: 14px !important;
113
+ letter-spacing: 0.5px !important;
114
+ }
115
+ .restored-highlight {
116
+ background-color: #90EE90 !important;
117
+ color: #000 !important;
118
+ font-weight: bold !important;
119
+ }
120
+ .examples {
121
+ font-family: 'Courier New', monospace !important;
122
+ font-size: 14px !important;
123
+ letter-spacing: 0.5px !important;
124
+ }
125
+ .restored-sequence-box {
126
+ font-family: 'Courier New', monospace !important;
127
+ font-size: 14px !important;
128
+ letter-spacing: 0.5px !important;
129
+ white-space: pre-wrap !important;
130
+ word-wrap: break-word !important;
131
+ overflow-wrap: break-word !important;
132
+ }
133
+ .restored-heading {
134
+ color: #2E8B57 !important;
135
+ font-weight: bold !important;
136
+ font-size: 18px !important;
137
+ }
138
+ .example-text {
139
+ font-family: 'Courier New', monospace !important;
140
+ font-size: 12px !important;
141
+ white-space: pre-wrap !important;
142
+ word-wrap: break-word !important;
143
+ }
144
+ .examples-table {
145
+ font-family: 'Courier New', monospace !important;
146
+ font-size: 12px !important;
147
+ white-space: pre-wrap !important;
148
+ word-wrap: break-word !important;
149
+ max-width: none !important;
150
+ overflow: visible !important;
151
+ }
152
+ .examples-table td {
153
+ font-family: 'Courier New', monospace !important;
154
+ font-size: 12px !important;
155
+ white-space: pre-wrap !important;
156
+ word-wrap: break-word !important;
157
+ max-width: none !important;
158
+ overflow: visible !important;
159
+ text-overflow: unset !important;
160
+ }
161
+ .sequence-output label {
162
+ font-weight: bold !important;
163
+ color: #495057 !important;
164
+ font-size: 14px !important;
165
+ margin-bottom: 5px !important;
166
+ }
167
+ /* Force full display of examples */
168
+ .examples-container {
169
+ font-family: 'Courier New', monospace !important;
170
+ font-size: 12px !important;
171
+ }
172
+ .examples-container table {
173
+ width: 100% !important;
174
+ table-layout: auto !important;
175
+ }
176
+ .examples-container td {
177
+ white-space: pre-wrap !important;
178
+ word-wrap: break-word !important;
179
+ overflow-wrap: break-word !important;
180
+ max-width: none !important;
181
+ text-overflow: unset !important;
182
+ padding: 8px !important;
183
+ vertical-align: top !important;
184
+ }
185
+ .examples-container th {
186
+ white-space: nowrap !important;
187
+ padding: 8px !important;
188
+ }
189
+ /* Override any Gradio default truncation */
190
+ .examples table td {
191
+ white-space: pre-wrap !important;
192
+ word-wrap: break-word !important;
193
+ overflow-wrap: break-word !important;
194
+ max-width: none !important;
195
+ text-overflow: unset !important;
196
+ overflow: visible !important;
197
+ font-family: 'Courier New', monospace !important;
198
+ font-size: 12px !important;
199
+ }
200
+ .examples table {
201
+ table-layout: auto !important;
202
+ width: 100% !important;
203
+ }
204
+ /* Target the specific examples component */
205
+ div[data-testid="examples"] table td {
206
+ white-space: pre-wrap !important;
207
+ word-wrap: break-word !important;
208
+ overflow-wrap: break-word !important;
209
+ max-width: none !important;
210
+ text-overflow: unset !important;
211
+ overflow: visible !important;
212
+ font-family: 'Courier New', monospace !important;
213
+ font-size: 12px !important;
214
+ }
215
+ /* Force examples to show full content */
216
+ .examples table, .examples table td, .examples table th {
217
+ white-space: pre-wrap !important;
218
+ word-wrap: break-word !important;
219
+ overflow-wrap: break-word !important;
220
+ max-width: none !important;
221
+ text-overflow: unset !important;
222
+ overflow: visible !important;
223
+ font-family: 'Courier New', monospace !important;
224
+ font-size: 12px !important;
225
+ table-layout: auto !important;
226
+ width: auto !important;
227
+ min-width: 100% !important;
228
+ }
229
+ /* Override any inline styles */
230
+ .examples * {
231
+ white-space: pre-wrap !important;
232
+ word-wrap: break-word !important;
233
+ overflow-wrap: break-word !important;
234
+ max-width: none !important;
235
+ text-overflow: unset !important;
236
+ overflow: visible !important;
237
+ }
238
+ /* Style output labels to match input labels exactly */
239
+ .output-label {
240
+ font-weight: 600 !important;
241
+ color: var(--label-text-color) !important;
242
+ font-size: 14px !important;
243
+ margin-bottom: 8px !important;
244
+ margin-top: 16px !important;
245
+ line-height: 1.4 !important;
246
+ display: block !important;
247
+ }
248
+ """) as demo:
249
+ gr.Markdown("""
250
+ # 🧬 AbLang2 Sequence Restorer
251
+
252
+ This app uses the AbLang2 model to restore masked residues (*) in antibody sequences.
253
+ You can provide either one or both heavy and light chain sequences.
254
+
255
+ **Instructions:**
256
+ - Use `*` to mask residues you want to restore
257
+ - Provide heavy chain, light chain, or both
258
+ - Enable "Use Alignment" for variable missing lengths
259
+ """)
260
+
261
+ with gr.Row():
262
+ with gr.Column():
263
+ heavy_input = gr.Textbox(
264
+ label="Heavy Chain Sequence",
265
+ placeholder="Enter heavy chain sequence with masked residues (*)...",
266
+ lines=3,
267
+ max_lines=5,
268
+ elem_classes=["sequence-input"]
269
+ )
270
+
271
+ light_input = gr.Textbox(
272
+ label="Light Chain Sequence",
273
+ placeholder="Enter light chain sequence with masked residues (*)...",
274
+ lines=3,
275
+ max_lines=5,
276
+ elem_classes=["sequence-input"]
277
+ )
278
+
279
+ align_checkbox = gr.Checkbox(
280
+ label="Use Alignment (for variable missing lengths)",
281
+ value=False
282
+ )
283
+
284
+ restore_btn = gr.Button("🔄 Restore Sequences", variant="primary")
285
+
286
+ with gr.Column():
287
+ gr.Markdown("### 🧬 Restored Sequences", elem_classes=["restored-heading"])
288
+ gr.Markdown("*Green highlighting shows restored residues*")
289
+
290
+ gr.Markdown("**Heavy Chain Sequence**", elem_classes=["output-label"])
291
+ heavy_output = gr.HTML(label="")
292
+
293
+ gr.Markdown("**Light Chain Sequence**", elem_classes=["output-label"])
294
+ light_output = gr.HTML(label="")
295
+
296
+ # Example sequences
297
+ gr.Examples(
298
+ examples=[
299
+ [
300
+ "EVQ***SGGEVKKPGASVKVSCRASGYTFRNYGLTWVRQAPGQGLEWMGWISAYNGNTNYAQKFQGRVTLTTDTSTSTAYMELRSLRSDDTAVYFCAR**PGHGAAFMDVWGTGTTVTVSS",
301
+ "DIQLTQSPLSLPVTLGQPASISCRSS*SLEASDTNIYLSWFQQRPGQSPRRLIYKI*NRDSGVPDRFSGSGSGTHFTLRISRVEADDVAVYYCMQGTHWPPAFGQGTKVDIK"
302
+ ],
303
+ [
304
+ "EVQLVESGGGLVQPGGSLRLSCAASGFTFSSYAMGWVRQAPGKGLEWVSAISGSGGSTYYADSVKGRFTISRDNSKNTLYLQMNSLRAEDTAVYYCARDY**GMDVWGQGTTVTVSS",
305
+ ""
306
+ ],
307
+ [
308
+ "",
309
+ "DIQLTQSPSSLSASVGDRVTITCRASQSISSYLNWYQQKPGKAPKLLIY*ASSLQSGVPSRFSGSGSGTDFTLTISSLQPEDFATYYCQQSYSTP*TFGQGTKVEIK"
310
+ ]
311
+ ],
312
+ inputs=[heavy_input, light_input],
313
+ label="Example Sequences"
314
+ )
315
+
316
+ # Connect the button to the function
317
+ restore_btn.click(
318
+ fn=restore_sequences,
319
+ inputs=[heavy_input, light_input, align_checkbox],
320
+ outputs=[heavy_output, light_output]
321
+ )
322
+
323
+ gr.Markdown("""
324
+ ---
325
+ **Note:** This app uses the AbLang2 model from Hugging Face Hub.
326
+ The restoration process may take a few seconds depending on sequence length and complexity.
327
+ """)
328
+
329
+ if __name__ == "__main__":
330
+ demo.launch()
requirements.txt CHANGED
@@ -3,4 +3,3 @@ transformers>=4.30.0
3
  torch>=2.0.0
4
  numpy>=1.21.0
5
  pandas>=1.3.0
6
- git+https://github.com/oxpig/ANARCI.git
 
3
  torch>=2.0.0
4
  numpy>=1.21.0
5
  pandas>=1.3.0
 
setup.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from setuptools import setup
2
+
3
+ setup(
4
+ name="ablang2-restore",
5
+ version="1.0.0",
6
+ install_requires=[
7
+ "gradio>=4.0.0",
8
+ "transformers>=4.30.0",
9
+ "torch>=2.0.0",
10
+ "numpy>=1.21.0",
11
+ "pandas>=1.3.0",
12
+ ],
13
+ dependency_links=[
14
+ "git+https://github.com/oxpig/ANARCI.git#egg=anarci"
15
+ ]
16
+ )