minor editing
Browse files- inference.py +1 -1
- models/aggregator.py +0 -17
- models/transformer.py +0 -1
inference.py
CHANGED
|
@@ -18,7 +18,7 @@ def infer(model, input_file):
|
|
| 18 |
|
| 19 |
if __name__ == '__main__':
|
| 20 |
parser = argparse.ArgumentParser(description="Inference")
|
| 21 |
-
parser.add_argument('--svs_dir', type=str, default='./
|
| 22 |
|
| 23 |
args = parser.parse_args()
|
| 24 |
|
|
|
|
| 18 |
|
| 19 |
if __name__ == '__main__':
|
| 20 |
parser = argparse.ArgumentParser(description="Inference")
|
| 21 |
+
parser.add_argument('--svs_dir', type=str, default='./samples', help="")
|
| 22 |
|
| 23 |
args = parser.parse_args()
|
| 24 |
|
models/aggregator.py
CHANGED
|
@@ -6,23 +6,6 @@ from functools import partial
|
|
| 6 |
|
| 7 |
from models.transformer import LayerNormFp32, LayerNorm, QuickGELU, Attention, VisionTransformer
|
| 8 |
|
| 9 |
-
|
| 10 |
-
# @dataclass
|
| 11 |
-
# class VisionCfg:
|
| 12 |
-
# layers: Union[Tuple[int, int, int, int], int] = 6
|
| 13 |
-
# width: int = 512
|
| 14 |
-
# head_width: int = 64
|
| 15 |
-
# mlp_ratio: float = 4.0
|
| 16 |
-
|
| 17 |
-
# ls_init_value: Optional[float] = None # layer scale initial value
|
| 18 |
-
# patch_dropout: float = 0. # what fraction of patches to dropout during training (0 would mean disabled and no patches dropped) - 0.5 to 0.75 recommended in the paper for optimal results
|
| 19 |
-
# no_ln_pre: bool = False # disable pre transformer LayerNorm
|
| 20 |
-
# pool_type: str = 'none'
|
| 21 |
-
# final_ln_after_pool: bool = True # apply final LayerNorm after pooling
|
| 22 |
-
# output_tokens: bool = False
|
| 23 |
-
# act_kwargs: Optional[dict] = None
|
| 24 |
-
# norm_kwargs: Optional[dict] = None
|
| 25 |
-
|
| 26 |
@dataclass
|
| 27 |
class CLIPVisionCfg:
|
| 28 |
layers: Union[Tuple[int, int, int, int], int] = 6
|
|
|
|
| 6 |
|
| 7 |
from models.transformer import LayerNormFp32, LayerNorm, QuickGELU, Attention, VisionTransformer
|
| 8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
@dataclass
|
| 10 |
class CLIPVisionCfg:
|
| 11 |
layers: Union[Tuple[int, int, int, int], int] = 6
|
models/transformer.py
CHANGED
|
@@ -215,7 +215,6 @@ class ResidualAttentionBlock(nn.Module):
|
|
| 215 |
self.ln_1 = norm_layer(d_model)
|
| 216 |
|
| 217 |
if use_flex:
|
| 218 |
-
print("Flex_Attention!")
|
| 219 |
self.attn = Flex_Attention(dim = d_model, num_heads=n_head, proj_drop=dropout, use_rel_bias=use_rel_bias)
|
| 220 |
else:
|
| 221 |
self.attn = Attention(dim = d_model, num_heads=n_head, batch_first=batch_first, proj_drop=dropout, attn_drop=dropout)
|
|
|
|
| 215 |
self.ln_1 = norm_layer(d_model)
|
| 216 |
|
| 217 |
if use_flex:
|
|
|
|
| 218 |
self.attn = Flex_Attention(dim = d_model, num_heads=n_head, proj_drop=dropout, use_rel_bias=use_rel_bias)
|
| 219 |
else:
|
| 220 |
self.attn = Attention(dim = d_model, num_heads=n_head, batch_first=batch_first, proj_drop=dropout, attn_drop=dropout)
|