File size: 5,258 Bytes
be903e2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 | ## current model load api
### Cons
#### long and awful code
#### two functions
#### deal float32 float16 quantized-u8
#### deal alignment size
```cpp
#if NCNN_STDIO
int Convolution::load_model(FILE* binfp)
{
int nread;
union
{
struct
{
unsigned char f0;
unsigned char f1;
unsigned char f2;
unsigned char f3;
};
unsigned int tag;
} flag_struct;
nread = fread(&flag_struct, sizeof(flag_struct), 1, binfp);
if (nread != 1)
{
fprintf(stderr, "Convolution read flag_struct failed %d\n", nread);
return -1;
}
unsigned int flag = flag_struct.f0 + flag_struct.f1 + flag_struct.f2 + flag_struct.f3;
weight_data.create(weight_data_size);
if (weight_data.empty())
return -100;
if (flag_struct.tag == 0x01306B47)
{
// half-precision weight data
int align_weight_data_size = alignSize(weight_data_size * sizeof(unsigned short), 4);
std::vector<unsigned short> float16_weights;
float16_weights.resize(align_weight_data_size);
nread = fread(float16_weights.data(), align_weight_data_size, 1, binfp);
if (nread != 1)
{
fprintf(stderr, "Convolution read float16_weights failed %d\n", nread);
return -1;
}
weight_data = Mat::from_float16(float16_weights.data(), weight_data_size);
if (weight_data.empty())
return -100;
}
else if (flag != 0)
{
// quantized weight data
float quantization_value[256];
nread = fread(quantization_value, 256 * sizeof(float), 1, binfp);
if (nread != 1)
{
fprintf(stderr, "Convolution read quantization_value failed %d\n", nread);
return -1;
}
int align_weight_data_size = alignSize(weight_data_size * sizeof(unsigned char), 4);
std::vector<unsigned char> index_array;
index_array.resize(align_weight_data_size);
nread = fread(index_array.data(), align_weight_data_size, 1, binfp);
if (nread != 1)
{
fprintf(stderr, "Convolution read index_array failed %d\n", nread);
return -1;
}
float* weight_data_ptr = weight_data;
for (int i = 0; i < weight_data_size; i++)
{
weight_data_ptr[i] = quantization_value[ index_array[i] ];
}
}
else if (flag_struct.f0 == 0)
{
// raw weight data
nread = fread(weight_data, weight_data_size * sizeof(float), 1, binfp);
if (nread != 1)
{
fprintf(stderr, "Convolution read weight_data failed %d\n", nread);
return -1;
}
}
if (bias_term)
{
bias_data.create(num_output);
if (bias_data.empty())
return -100;
nread = fread(bias_data, num_output * sizeof(float), 1, binfp);
if (nread != 1)
{
fprintf(stderr, "Convolution read bias_data failed %d\n", nread);
return -1;
}
}
return 0;
}
#endif // NCNN_STDIO
int Convolution::load_model(const unsigned char*& mem)
{
union
{
struct
{
unsigned char f0;
unsigned char f1;
unsigned char f2;
unsigned char f3;
};
unsigned int tag;
} flag_struct;
memcpy(&flag_struct, mem, sizeof(flag_struct));
mem += sizeof(flag_struct);
unsigned int flag = flag_struct.f0 + flag_struct.f1 + flag_struct.f2 + flag_struct.f3;
if (flag_struct.tag == 0x01306B47)
{
// half-precision weight data
weight_data = Mat::from_float16((unsigned short*)mem, weight_data_size);
mem += alignSize(weight_data_size * sizeof(unsigned short), 4);
if (weight_data.empty())
return -100;
}
else if (flag != 0)
{
// quantized weight data
const float* quantization_value = (const float*)mem;
mem += 256 * sizeof(float);
const unsigned char* index_array = (const unsigned char*)mem;
mem += alignSize(weight_data_size * sizeof(unsigned char), 4);
weight_data.create(weight_data_size);
if (weight_data.empty())
return -100;
float* weight_data_ptr = weight_data;
for (int i = 0; i < weight_data_size; i++)
{
weight_data_ptr[i] = quantization_value[ index_array[i] ];
}
}
else if (flag_struct.f0 == 0)
{
// raw weight data
weight_data = Mat(weight_data_size, (float*)mem);
mem += weight_data_size * sizeof(float);
}
if (bias_term)
{
bias_data = Mat(num_output, (float*)mem);
mem += num_output * sizeof(float);
}
return 0;
}
```
## new model load api proposed
### Pros
#### clean and simple api
#### element type detection
```cpp
int Convolution::load_model(const ModelBin& mb)
{
// auto detect element type
weight_data = mb.load(weight_data_size, 0);
if (weight_data.empty())
return -100;
if (bias_term)
{
// certain type specified
bias_data = mb.load(num_output, 1);
if (bias_data.empty())
return -100;
}
return 0;
}
```
|