oneocr / _archive /temp /decode_manifest.py
OneOCR Dev
OneOCR - reverse engineering complete, ONNX pipeline 53% match rate
ce847d4
"""
Decode manifest.bin protobuf from onemodel — extract detector config parameters.
This is a raw protobuf that we decode manually (wire format).
"""
import struct
def decode_varint(data, pos):
result = 0
shift = 0
while pos < len(data):
b = data[pos]
result |= (b & 0x7f) << shift
pos += 1
if not (b & 0x80):
break
shift += 7
return result, pos
def decode_protobuf(data, depth=0, prefix=""):
pos = 0
indent = " " * depth
while pos < len(data):
if pos >= len(data):
break
tag_wire, new_pos = decode_varint(data, pos)
if new_pos == pos:
break
field_num = tag_wire >> 3
wire_type = tag_wire & 0x7
pos = new_pos
if wire_type == 0: # Varint
val, pos = decode_varint(data, pos)
print(f"{indent}Field {field_num} (varint): {val}")
elif wire_type == 1: # 64-bit
if pos + 8 > len(data):
break
val_d = struct.unpack('<d', data[pos:pos+8])[0]
print(f"{indent}Field {field_num} (64-bit/double): {val_d}")
pos += 8
elif wire_type == 2: # Length-delimited
length, pos = decode_varint(data, pos)
if pos + length > len(data):
break
payload = data[pos:pos+length]
pos += length
# Try to interpret as string
try:
s = payload.decode('utf-8')
if all(32 <= ord(c) < 127 or c in '\n\r\t' for c in s):
if len(s) < 300:
print(f"{indent}Field {field_num} (string): \"{s}\"")
else:
print(f"{indent}Field {field_num} (string, {len(s)} chars): \"{s[:100]}...\"")
continue
except:
pass
# Try as sub-message
if len(payload) > 2:
# Heuristic: if first byte looks like a valid tag
first_tag = payload[0]
if 0 < first_tag < 128 and (first_tag & 0x7) <= 5:
print(f"{indent}Field {field_num} (message, {len(payload)} bytes):")
try:
decode_protobuf(payload, depth + 1, prefix + f".{field_num}")
except:
# Not a valid protobuf, dump hex
hex_str = payload[:50].hex()
print(f"{indent} [raw hex: {hex_str}...]")
else:
hex_str = payload[:50].hex()
print(f"{indent}Field {field_num} (bytes, {len(payload)}): {hex_str}")
else:
hex_str = payload.hex()
print(f"{indent}Field {field_num} (bytes, {len(payload)}): {hex_str}")
elif wire_type == 5: # 32-bit
if pos + 4 > len(data):
break
val_f = struct.unpack('<f', data[pos:pos+4])[0]
val_i = struct.unpack('<I', data[pos:pos+4])[0]
print(f"{indent}Field {field_num} (32-bit/float): {val_f} (int: {val_i})")
pos += 4
else:
print(f"{indent}Field {field_num} (wire_type={wire_type}): UNKNOWN")
break
with open('oneocr_extracted/config_data/manifest.bin', 'rb') as f:
data = f.read()
print(f"Manifest size: {len(data)} bytes")
print("="*60)
print("DECODED PROTOBUF")
print("="*60)
decode_protobuf(data)