investigated if tile quantization would save me data

- it doesn't, after compression it's bigger'
This commit is contained in:
2026-01-21 21:42:05 +01:00
parent 2d660e05a0
commit cf2bcdc059

416
tooling/mount_quantize.py Normal file
View File

@@ -0,0 +1,416 @@
#!/usr/bin/env python3
"""
Test quantization schemes for .mound files and measure precision loss.
Usage:
python mound_quantize.py input.mound
This tool:
1. Reads the original float32 .mound file
2. Tests different quantization schemes:
- uint16 + uint16 + uint8 (5 bytes/vertex)
- uint16 + uint16 + uint16 (6 bytes/vertex)
3. Measures quantization error (mean, median, max)
4. Reports file size savings
5. Optionally writes quantized versions for testing
Results:
 …/moundhunters   master ?  ♥ 21:12 
python tooling/mount_quantize.py data/MOUND/BS19830748.mound
Reading data/MOUND/BS19830748.mound...
Original file:
Points: 751,240
Triangles: 268,718
Vertex data: 8.60 MB
Index data: 3.08 MB
Total size: 11.67 MB
Format: float32 (12 bytes/vertex)
======================================================================
Testing quantization schemes...
======================================================================
======================================================================
uint16 + uint16 + uint8
======================================================================
Bytes per vertex: 5
Vertex data: 3.58 MB (vs 8.60 MB original)
Vertex savings: 58.3%
Index data: 3.08 MB (unchanged)
Total file size: 6.66 MB (vs 11.67 MB original)
Total savings: 43.0%
Terrain spans: X=498.00m, Y=499.50m, Z=17.81m
X-axis errors (meters):
Mean: 0.0000 cm
Median: 0.0000 cm
Max: 0.0000 cm
Y-axis errors (meters):
Mean: 0.0000 cm
Median: 0.0000 cm
Max: 0.0000 cm
Z-axis errors (meters):
Mean: 3.4896 cm
Median: 3.4973 cm
Max: 6.9824 cm
3D Euclidean errors (meters):
Mean: 3.4896 cm
Median: 3.4973 cm
Max: 6.9824 cm
======================================================================
uint16 + uint16 + uint16
======================================================================
Bytes per vertex: 6
Vertex data: 4.30 MB (vs 8.60 MB original)
Vertex savings: 50.0%
Index data: 3.08 MB (unchanged)
Total file size: 7.37 MB (vs 11.67 MB original)
Total savings: 36.8%
Terrain spans: X=498.00m, Y=499.50m, Z=17.81m
X-axis errors (meters):
Mean: 0.0000 cm
Median: 0.0000 cm
Max: 0.0000 cm
Y-axis errors (meters):
Mean: 0.0000 cm
Median: 0.0000 cm
Max: 0.0000 cm
Z-axis errors (meters):
Mean: 0.0135 cm
Median: 0.0122 cm
Max: 0.0275 cm
3D Euclidean errors (meters):
Mean: 0.0135 cm
Median: 0.0122 cm
Max: 0.0275 cm
Done!
Conclusion:
totally feasible, but probably not worth it
I tried it anyway! Turns out compression beats me handily:
Float32 vs Quantized Compression Results: Testing revealed that
float32 vertices compress significantly better than quantized
uint16+uint16+uint8 formats. Float32 achieves ~65-70% compression
with brotli (12 MB → 3.9 MB), while quantized achieves only
~35-40% (6.7 MB → 4.2 MB), making the compressed float32 files
actually smaller. This is because IEEE 754 floats have inherent
structure—spatially correlated terrain points share similar
exponents, creating repetitive byte patterns that LZ algorithms
exploit effectively. Quantization, while reducing raw file size,
spreads values across the full uint16/uint8 range and destroys
this natural clustering, increasing entropy and reducing
compressibility. The lesson: modern compression algorithms are
incredibly sophisticated at exploiting numerical data structure.
For web delivery, ship float32 with brotli/gzip—simpler format,
better compression, zero precision loss.
"""
import sys
import struct
import numpy as np
from pathlib import Path
def read_mound_header(filepath):
"""Read .mound header."""
with open(filepath, 'rb') as f:
magic = f.read(4)
if magic != b'LIDR':
raise ValueError(f"Invalid magic number: {magic}")
version = struct.unpack('I', f.read(4))[0]
point_count = struct.unpack('I', f.read(4))[0]
triangle_count = struct.unpack('I', f.read(4))[0]
min_x = struct.unpack('f', f.read(4))[0]
min_y = struct.unpack('f', f.read(4))[0]
min_z = struct.unpack('f', f.read(4))[0]
max_x = struct.unpack('f', f.read(4))[0]
max_y = struct.unpack('f', f.read(4))[0]
max_z = struct.unpack('f', f.read(4))[0]
return {
'version': version,
'point_count': point_count,
'triangle_count': triangle_count,
'bounds': {
'min_x': min_x, 'max_x': max_x,
'min_y': min_y, 'max_y': max_y,
'min_z': min_z, 'max_z': max_z,
}
}
def read_mound(filepath):
"""Read complete .mound file."""
header = read_mound_header(filepath)
with open(filepath, 'rb') as f:
f.seek(64) # Skip header
# Read vertices
vertex_bytes = header['point_count'] * 3 * 4 # 3 float32 per vertex
vertex_data = f.read(vertex_bytes)
vertices = np.frombuffer(vertex_data, dtype=np.float32).reshape(-1, 3)
# Read indices
index_bytes = header['triangle_count'] * 3 * 4 # 3 uint32 per triangle
index_data = f.read(index_bytes)
indices = np.frombuffer(index_data, dtype=np.uint32).reshape(-1, 3)
return header, vertices, indices
def quantize_u16_u16_u8(vertices, bounds):
"""Quantize to uint16 + uint16 + uint8 (5 bytes/vertex)."""
min_x, max_x = bounds['min_x'], bounds['max_x']
min_y, max_y = bounds['min_y'], bounds['max_y']
min_z, max_z = bounds['min_z'], bounds['max_z']
# Normalize to [0, 1]
x_norm = (vertices[:, 0] - min_x) / (max_x - min_x)
y_norm = (vertices[:, 1] - min_y) / (max_y - min_y)
z_norm = (vertices[:, 2] - min_z) / (max_z - min_z)
# Quantize
x_quant = np.clip(x_norm * 65535, 0, 65535).astype(np.uint16)
y_quant = np.clip(y_norm * 65535, 0, 65535).astype(np.uint16)
z_quant = np.clip(z_norm * 255, 0, 255).astype(np.uint8)
# Dequantize back to float32
x_dequant = (x_quant.astype(np.float32) / 65535) * (max_x - min_x) + min_x
y_dequant = (y_quant.astype(np.float32) / 65535) * (max_y - min_y) + min_y
z_dequant = (z_quant.astype(np.float32) / 255) * (max_z - min_z) + min_z
reconstructed = np.column_stack([x_dequant, y_dequant, z_dequant])
return reconstructed, (x_quant, y_quant, z_quant)
def quantize_u16_u16_u16(vertices, bounds):
"""Quantize to uint16 + uint16 + uint16 (6 bytes/vertex)."""
min_x, max_x = bounds['min_x'], bounds['max_x']
min_y, max_y = bounds['min_y'], bounds['max_y']
min_z, max_z = bounds['min_z'], bounds['max_z']
# Normalize to [0, 1]
x_norm = (vertices[:, 0] - min_x) / (max_x - min_x)
y_norm = (vertices[:, 1] - min_y) / (max_y - min_y)
z_norm = (vertices[:, 2] - min_z) / (max_z - min_z)
# Quantize
x_quant = np.clip(x_norm * 65535, 0, 65535).astype(np.uint16)
y_quant = np.clip(y_norm * 65535, 0, 65535).astype(np.uint16)
z_quant = np.clip(z_norm * 65535, 0, 65535).astype(np.uint16)
# Dequantize back to float32
x_dequant = (x_quant.astype(np.float32) / 65535) * (max_x - min_x) + min_x
y_dequant = (y_quant.astype(np.float32) / 65535) * (max_y - min_y) + min_y
z_dequant = (z_quant.astype(np.float32) / 65535) * (max_z - min_z) + min_z
reconstructed = np.column_stack([x_dequant, y_dequant, z_dequant])
return reconstructed, (x_quant, y_quant, z_quant)
def compute_errors(original, reconstructed):
"""Compute per-axis and total errors."""
diff = np.abs(original - reconstructed)
errors = {
'x': {
'mean': diff[:, 0].mean(),
'median': np.median(diff[:, 0]),
'max': diff[:, 0].max(),
},
'y': {
'mean': diff[:, 1].mean(),
'median': np.median(diff[:, 1]),
'max': diff[:, 1].max(),
},
'z': {
'mean': diff[:, 2].mean(),
'median': np.median(diff[:, 2]),
'max': diff[:, 2].max(),
},
'euclidean': {
'mean': np.linalg.norm(diff, axis=1).mean(),
'median': np.median(np.linalg.norm(diff, axis=1)),
'max': np.linalg.norm(diff, axis=1).max(),
}
}
return errors
def write_quantized_mound(filepath, header, quantized_data, indices, format_type):
"""Write quantized .mound file with modified format."""
point_count = header['point_count']
triangle_count = header['triangle_count']
bounds = header['bounds']
x_quant, y_quant, z_quant = quantized_data
with open(filepath, 'wb') as f:
# Header (64 bytes) - same structure, but version indicates quantization
f.write(b'LIDR')
if format_type == 'u16_u16_u8':
f.write(struct.pack('I', 2)) # Version 2 = uint16+uint16+uint8
elif format_type == 'u16_u16_u16':
f.write(struct.pack('I', 3)) # Version 3 = uint16+uint16+uint16
f.write(struct.pack('I', point_count))
f.write(struct.pack('I', triangle_count))
f.write(struct.pack('f', bounds['min_x']))
f.write(struct.pack('f', bounds['min_y']))
f.write(struct.pack('f', bounds['min_z']))
f.write(struct.pack('f', bounds['max_x']))
f.write(struct.pack('f', bounds['max_y']))
f.write(struct.pack('f', bounds['max_z']))
f.write(b'\x00' * 24)
# Write quantized vertices
for i in range(point_count):
f.write(struct.pack('H', x_quant[i]))
f.write(struct.pack('H', y_quant[i]))
if format_type == 'u16_u16_u8':
f.write(struct.pack('B', z_quant[i]))
else: # u16_u16_u16
f.write(struct.pack('H', z_quant[i]))
# Write indices (unchanged)
f.write(indices.tobytes())
def print_stats(name, errors, bounds, bytes_per_vertex, original_vertex_size_mb,
original_total_size_mb, point_count, triangle_count):
"""Print statistics for a quantization scheme."""
span_x = bounds['max_x'] - bounds['min_x']
span_y = bounds['max_y'] - bounds['min_y']
span_z = bounds['max_z'] - bounds['min_z']
# Calculate file sizes
header_size = 64
vertex_size = point_count * bytes_per_vertex
vertex_size_mb = vertex_size / (1024 * 1024)
index_size = triangle_count * 3 * 4 # 3 uint32 per triangle
index_size_mb = index_size / (1024 * 1024)
total_size = header_size + vertex_size + index_size
total_size_mb = total_size / (1024 * 1024)
vertex_savings_pct = (1 - vertex_size_mb / original_vertex_size_mb) * 100
total_savings_pct = (1 - total_size_mb / original_total_size_mb) * 100
print(f"\n{'='*70}")
print(f"{name}")
print(f"{'='*70}")
print(f"Bytes per vertex: {bytes_per_vertex}")
print(f"\nVertex data: {vertex_size_mb:.2f} MB (vs {original_vertex_size_mb:.2f} MB original)")
print(f" Vertex savings: {vertex_savings_pct:.1f}%")
print(f"Index data: {index_size_mb:.2f} MB (unchanged)")
print(f"Total file size: {total_size_mb:.2f} MB (vs {original_total_size_mb:.2f} MB original)")
print(f" Total savings: {total_savings_pct:.1f}%")
print(f"\nTerrain spans: X={span_x:.2f}m, Y={span_y:.2f}m, Z={span_z:.2f}m")
print(f"\nX-axis errors (meters):")
print(f" Mean: {errors['x']['mean']*100:.4f} cm")
print(f" Median: {errors['x']['median']*100:.4f} cm")
print(f" Max: {errors['x']['max']*100:.4f} cm")
print(f"\nY-axis errors (meters):")
print(f" Mean: {errors['y']['mean']*100:.4f} cm")
print(f" Median: {errors['y']['median']*100:.4f} cm")
print(f" Max: {errors['y']['max']*100:.4f} cm")
print(f"\nZ-axis errors (meters):")
print(f" Mean: {errors['z']['mean']*100:.4f} cm")
print(f" Median: {errors['z']['median']*100:.4f} cm")
print(f" Max: {errors['z']['max']*100:.4f} cm")
print(f"\n3D Euclidean errors (meters):")
print(f" Mean: {errors['euclidean']['mean']*100:.4f} cm")
print(f" Median: {errors['euclidean']['median']*100:.4f} cm")
print(f" Max: {errors['euclidean']['max']*100:.4f} cm")
def main():
if len(sys.argv) < 2:
print("Usage: python mound_quantize.py input.mound [--write-quantized]")
sys.exit(1)
input_file = sys.argv[1]
write_files = '--write-quantized' in sys.argv
if not Path(input_file).exists():
print(f"Error: Input file '{input_file}' not found")
sys.exit(1)
print(f"Reading {input_file}...")
header, vertices, indices = read_mound(input_file)
original_total_size = Path(input_file).stat().st_size / (1024 * 1024)
original_vertex_size = (header['point_count'] * 12) / (1024 * 1024) # 12 bytes per vertex (3 float32)
original_index_size = (header['triangle_count'] * 12) / (1024 * 1024) # 12 bytes per triangle (3 uint32)
print(f"\nOriginal file:")
print(f" Points: {header['point_count']:,}")
print(f" Triangles: {header['triangle_count']:,}")
print(f" Vertex data: {original_vertex_size:.2f} MB")
print(f" Index data: {original_index_size:.2f} MB")
print(f" Total size: {original_total_size:.2f} MB")
print(f" Format: float32 (12 bytes/vertex)")
# Test uint16 + uint16 + uint8
print("\n" + "="*70)
print("Testing quantization schemes...")
print("="*70)
reconstructed_5, quant_5 = quantize_u16_u16_u8(vertices, header['bounds'])
errors_5 = compute_errors(vertices, reconstructed_5)
print_stats("uint16 + uint16 + uint8", errors_5, header['bounds'], 5,
original_vertex_size, original_total_size,
header['point_count'], header['triangle_count'])
# Test uint16 + uint16 + uint16
reconstructed_6, quant_6 = quantize_u16_u16_u16(vertices, header['bounds'])
errors_6 = compute_errors(vertices, reconstructed_6)
print_stats("uint16 + uint16 + uint16", errors_6, header['bounds'], 6,
original_vertex_size, original_total_size,
header['point_count'], header['triangle_count'])
# Write quantized files if requested
if write_files:
base_path = Path(input_file)
output_5 = base_path.with_suffix('.u16u16u8.mound')
output_6 = base_path.with_suffix('.u16u16u16.mound')
print(f"\nWriting quantized files...")
write_quantized_mound(output_5, header, quant_5, indices, 'u16_u16_u8')
print(f" {output_5}")
write_quantized_mound(output_6, header, quant_6, indices, 'u16_u16_u16')
print(f" {output_6}")
# Verify file sizes
size_5 = Path(output_5).stat().st_size / (1024 * 1024)
size_6 = Path(output_6).stat().st_size / (1024 * 1024)
print(f"\nActual file sizes:")
print(f" uint16+uint16+uint8: {size_5:.2f} MB")
print(f" uint16+uint16+uint16: {size_6:.2f} MB")
print("\nDone!")
if __name__ == '__main__':
main()