From cf2bcdc059a325cbd248d18d4edc125cf431719d Mon Sep 17 00:00:00 2001 From: Mark Kalsbeek Date: Wed, 21 Jan 2026 21:42:05 +0100 Subject: [PATCH] investigated if tile quantization would save me data - it doesn't, after compression it's bigger' --- tooling/mount_quantize.py | 416 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 416 insertions(+) create mode 100644 tooling/mount_quantize.py diff --git a/tooling/mount_quantize.py b/tooling/mount_quantize.py new file mode 100644 index 0000000..bd8fee2 --- /dev/null +++ b/tooling/mount_quantize.py @@ -0,0 +1,416 @@ +#!/usr/bin/env python3 +""" +Test quantization schemes for .mound files and measure precision loss. + +Usage: + python mound_quantize.py input.mound + +This tool: +1. Reads the original float32 .mound file +2. Tests different quantization schemes: + - uint16 + uint16 + uint8 (5 bytes/vertex) + - uint16 + uint16 + uint16 (6 bytes/vertex) +3. Measures quantization error (mean, median, max) +4. Reports file size savings +5. Optionally writes quantized versions for testing + +Results: + …/moundhunters   master ?  ♥ 21:12  +❯ python tooling/mount_quantize.py data/MOUND/BS19830748.mound +Reading data/MOUND/BS19830748.mound... + +Original file: + Points: 751,240 + Triangles: 268,718 + Vertex data: 8.60 MB + Index data: 3.08 MB + Total size: 11.67 MB + Format: float32 (12 bytes/vertex) + +====================================================================== +Testing quantization schemes... +====================================================================== + +====================================================================== +uint16 + uint16 + uint8 +====================================================================== +Bytes per vertex: 5 + +Vertex data: 3.58 MB (vs 8.60 MB original) + Vertex savings: 58.3% +Index data: 3.08 MB (unchanged) +Total file size: 6.66 MB (vs 11.67 MB original) + Total savings: 43.0% + +Terrain spans: X=498.00m, Y=499.50m, Z=17.81m + +X-axis errors (meters): + Mean: 0.0000 cm + Median: 0.0000 cm + Max: 0.0000 cm + +Y-axis errors (meters): + Mean: 0.0000 cm + Median: 0.0000 cm + Max: 0.0000 cm + +Z-axis errors (meters): + Mean: 3.4896 cm + Median: 3.4973 cm + Max: 6.9824 cm + +3D Euclidean errors (meters): + Mean: 3.4896 cm + Median: 3.4973 cm + Max: 6.9824 cm + +====================================================================== +uint16 + uint16 + uint16 +====================================================================== +Bytes per vertex: 6 + +Vertex data: 4.30 MB (vs 8.60 MB original) + Vertex savings: 50.0% +Index data: 3.08 MB (unchanged) +Total file size: 7.37 MB (vs 11.67 MB original) + Total savings: 36.8% + +Terrain spans: X=498.00m, Y=499.50m, Z=17.81m + +X-axis errors (meters): + Mean: 0.0000 cm + Median: 0.0000 cm + Max: 0.0000 cm + +Y-axis errors (meters): + Mean: 0.0000 cm + Median: 0.0000 cm + Max: 0.0000 cm + +Z-axis errors (meters): + Mean: 0.0135 cm + Median: 0.0122 cm + Max: 0.0275 cm + +3D Euclidean errors (meters): + Mean: 0.0135 cm + Median: 0.0122 cm + Max: 0.0275 cm + +Done! + +Conclusion: +totally feasible, but probably not worth it + +I tried it anyway! Turns out compression beats me handily: +Float32 vs Quantized Compression Results: Testing revealed that +float32 vertices compress significantly better than quantized +uint16+uint16+uint8 formats. Float32 achieves ~65-70% compression +with brotli (12 MB → 3.9 MB), while quantized achieves only +~35-40% (6.7 MB → 4.2 MB), making the compressed float32 files +actually smaller. This is because IEEE 754 floats have inherent +structure—spatially correlated terrain points share similar +exponents, creating repetitive byte patterns that LZ algorithms +exploit effectively. Quantization, while reducing raw file size, +spreads values across the full uint16/uint8 range and destroys +this natural clustering, increasing entropy and reducing +compressibility. The lesson: modern compression algorithms are +incredibly sophisticated at exploiting numerical data structure. +For web delivery, ship float32 with brotli/gzip—simpler format, +better compression, zero precision loss. + +""" + +import sys +import struct +import numpy as np +from pathlib import Path + + +def read_mound_header(filepath): + """Read .mound header.""" + with open(filepath, 'rb') as f: + magic = f.read(4) + if magic != b'LIDR': + raise ValueError(f"Invalid magic number: {magic}") + + version = struct.unpack('I', f.read(4))[0] + point_count = struct.unpack('I', f.read(4))[0] + triangle_count = struct.unpack('I', f.read(4))[0] + min_x = struct.unpack('f', f.read(4))[0] + min_y = struct.unpack('f', f.read(4))[0] + min_z = struct.unpack('f', f.read(4))[0] + max_x = struct.unpack('f', f.read(4))[0] + max_y = struct.unpack('f', f.read(4))[0] + max_z = struct.unpack('f', f.read(4))[0] + + return { + 'version': version, + 'point_count': point_count, + 'triangle_count': triangle_count, + 'bounds': { + 'min_x': min_x, 'max_x': max_x, + 'min_y': min_y, 'max_y': max_y, + 'min_z': min_z, 'max_z': max_z, + } + } + + +def read_mound(filepath): + """Read complete .mound file.""" + header = read_mound_header(filepath) + + with open(filepath, 'rb') as f: + f.seek(64) # Skip header + + # Read vertices + vertex_bytes = header['point_count'] * 3 * 4 # 3 float32 per vertex + vertex_data = f.read(vertex_bytes) + vertices = np.frombuffer(vertex_data, dtype=np.float32).reshape(-1, 3) + + # Read indices + index_bytes = header['triangle_count'] * 3 * 4 # 3 uint32 per triangle + index_data = f.read(index_bytes) + indices = np.frombuffer(index_data, dtype=np.uint32).reshape(-1, 3) + + return header, vertices, indices + + +def quantize_u16_u16_u8(vertices, bounds): + """Quantize to uint16 + uint16 + uint8 (5 bytes/vertex).""" + min_x, max_x = bounds['min_x'], bounds['max_x'] + min_y, max_y = bounds['min_y'], bounds['max_y'] + min_z, max_z = bounds['min_z'], bounds['max_z'] + + # Normalize to [0, 1] + x_norm = (vertices[:, 0] - min_x) / (max_x - min_x) + y_norm = (vertices[:, 1] - min_y) / (max_y - min_y) + z_norm = (vertices[:, 2] - min_z) / (max_z - min_z) + + # Quantize + x_quant = np.clip(x_norm * 65535, 0, 65535).astype(np.uint16) + y_quant = np.clip(y_norm * 65535, 0, 65535).astype(np.uint16) + z_quant = np.clip(z_norm * 255, 0, 255).astype(np.uint8) + + # Dequantize back to float32 + x_dequant = (x_quant.astype(np.float32) / 65535) * (max_x - min_x) + min_x + y_dequant = (y_quant.astype(np.float32) / 65535) * (max_y - min_y) + min_y + z_dequant = (z_quant.astype(np.float32) / 255) * (max_z - min_z) + min_z + + reconstructed = np.column_stack([x_dequant, y_dequant, z_dequant]) + + return reconstructed, (x_quant, y_quant, z_quant) + + +def quantize_u16_u16_u16(vertices, bounds): + """Quantize to uint16 + uint16 + uint16 (6 bytes/vertex).""" + min_x, max_x = bounds['min_x'], bounds['max_x'] + min_y, max_y = bounds['min_y'], bounds['max_y'] + min_z, max_z = bounds['min_z'], bounds['max_z'] + + # Normalize to [0, 1] + x_norm = (vertices[:, 0] - min_x) / (max_x - min_x) + y_norm = (vertices[:, 1] - min_y) / (max_y - min_y) + z_norm = (vertices[:, 2] - min_z) / (max_z - min_z) + + # Quantize + x_quant = np.clip(x_norm * 65535, 0, 65535).astype(np.uint16) + y_quant = np.clip(y_norm * 65535, 0, 65535).astype(np.uint16) + z_quant = np.clip(z_norm * 65535, 0, 65535).astype(np.uint16) + + # Dequantize back to float32 + x_dequant = (x_quant.astype(np.float32) / 65535) * (max_x - min_x) + min_x + y_dequant = (y_quant.astype(np.float32) / 65535) * (max_y - min_y) + min_y + z_dequant = (z_quant.astype(np.float32) / 65535) * (max_z - min_z) + min_z + + reconstructed = np.column_stack([x_dequant, y_dequant, z_dequant]) + + return reconstructed, (x_quant, y_quant, z_quant) + + +def compute_errors(original, reconstructed): + """Compute per-axis and total errors.""" + diff = np.abs(original - reconstructed) + + errors = { + 'x': { + 'mean': diff[:, 0].mean(), + 'median': np.median(diff[:, 0]), + 'max': diff[:, 0].max(), + }, + 'y': { + 'mean': diff[:, 1].mean(), + 'median': np.median(diff[:, 1]), + 'max': diff[:, 1].max(), + }, + 'z': { + 'mean': diff[:, 2].mean(), + 'median': np.median(diff[:, 2]), + 'max': diff[:, 2].max(), + }, + 'euclidean': { + 'mean': np.linalg.norm(diff, axis=1).mean(), + 'median': np.median(np.linalg.norm(diff, axis=1)), + 'max': np.linalg.norm(diff, axis=1).max(), + } + } + + return errors + + +def write_quantized_mound(filepath, header, quantized_data, indices, format_type): + """Write quantized .mound file with modified format.""" + point_count = header['point_count'] + triangle_count = header['triangle_count'] + bounds = header['bounds'] + + x_quant, y_quant, z_quant = quantized_data + + with open(filepath, 'wb') as f: + # Header (64 bytes) - same structure, but version indicates quantization + f.write(b'LIDR') + if format_type == 'u16_u16_u8': + f.write(struct.pack('I', 2)) # Version 2 = uint16+uint16+uint8 + elif format_type == 'u16_u16_u16': + f.write(struct.pack('I', 3)) # Version 3 = uint16+uint16+uint16 + + f.write(struct.pack('I', point_count)) + f.write(struct.pack('I', triangle_count)) + f.write(struct.pack('f', bounds['min_x'])) + f.write(struct.pack('f', bounds['min_y'])) + f.write(struct.pack('f', bounds['min_z'])) + f.write(struct.pack('f', bounds['max_x'])) + f.write(struct.pack('f', bounds['max_y'])) + f.write(struct.pack('f', bounds['max_z'])) + f.write(b'\x00' * 24) + + # Write quantized vertices + for i in range(point_count): + f.write(struct.pack('H', x_quant[i])) + f.write(struct.pack('H', y_quant[i])) + if format_type == 'u16_u16_u8': + f.write(struct.pack('B', z_quant[i])) + else: # u16_u16_u16 + f.write(struct.pack('H', z_quant[i])) + + # Write indices (unchanged) + f.write(indices.tobytes()) + + +def print_stats(name, errors, bounds, bytes_per_vertex, original_vertex_size_mb, + original_total_size_mb, point_count, triangle_count): + """Print statistics for a quantization scheme.""" + span_x = bounds['max_x'] - bounds['min_x'] + span_y = bounds['max_y'] - bounds['min_y'] + span_z = bounds['max_z'] - bounds['min_z'] + + # Calculate file sizes + header_size = 64 + vertex_size = point_count * bytes_per_vertex + vertex_size_mb = vertex_size / (1024 * 1024) + index_size = triangle_count * 3 * 4 # 3 uint32 per triangle + index_size_mb = index_size / (1024 * 1024) + total_size = header_size + vertex_size + index_size + total_size_mb = total_size / (1024 * 1024) + + vertex_savings_pct = (1 - vertex_size_mb / original_vertex_size_mb) * 100 + total_savings_pct = (1 - total_size_mb / original_total_size_mb) * 100 + + print(f"\n{'='*70}") + print(f"{name}") + print(f"{'='*70}") + print(f"Bytes per vertex: {bytes_per_vertex}") + print(f"\nVertex data: {vertex_size_mb:.2f} MB (vs {original_vertex_size_mb:.2f} MB original)") + print(f" Vertex savings: {vertex_savings_pct:.1f}%") + print(f"Index data: {index_size_mb:.2f} MB (unchanged)") + print(f"Total file size: {total_size_mb:.2f} MB (vs {original_total_size_mb:.2f} MB original)") + print(f" Total savings: {total_savings_pct:.1f}%") + print(f"\nTerrain spans: X={span_x:.2f}m, Y={span_y:.2f}m, Z={span_z:.2f}m") + print(f"\nX-axis errors (meters):") + print(f" Mean: {errors['x']['mean']*100:.4f} cm") + print(f" Median: {errors['x']['median']*100:.4f} cm") + print(f" Max: {errors['x']['max']*100:.4f} cm") + print(f"\nY-axis errors (meters):") + print(f" Mean: {errors['y']['mean']*100:.4f} cm") + print(f" Median: {errors['y']['median']*100:.4f} cm") + print(f" Max: {errors['y']['max']*100:.4f} cm") + print(f"\nZ-axis errors (meters):") + print(f" Mean: {errors['z']['mean']*100:.4f} cm") + print(f" Median: {errors['z']['median']*100:.4f} cm") + print(f" Max: {errors['z']['max']*100:.4f} cm") + print(f"\n3D Euclidean errors (meters):") + print(f" Mean: {errors['euclidean']['mean']*100:.4f} cm") + print(f" Median: {errors['euclidean']['median']*100:.4f} cm") + print(f" Max: {errors['euclidean']['max']*100:.4f} cm") + + +def main(): + if len(sys.argv) < 2: + print("Usage: python mound_quantize.py input.mound [--write-quantized]") + sys.exit(1) + + input_file = sys.argv[1] + write_files = '--write-quantized' in sys.argv + + if not Path(input_file).exists(): + print(f"Error: Input file '{input_file}' not found") + sys.exit(1) + + print(f"Reading {input_file}...") + header, vertices, indices = read_mound(input_file) + + original_total_size = Path(input_file).stat().st_size / (1024 * 1024) + original_vertex_size = (header['point_count'] * 12) / (1024 * 1024) # 12 bytes per vertex (3 float32) + original_index_size = (header['triangle_count'] * 12) / (1024 * 1024) # 12 bytes per triangle (3 uint32) + + print(f"\nOriginal file:") + print(f" Points: {header['point_count']:,}") + print(f" Triangles: {header['triangle_count']:,}") + print(f" Vertex data: {original_vertex_size:.2f} MB") + print(f" Index data: {original_index_size:.2f} MB") + print(f" Total size: {original_total_size:.2f} MB") + print(f" Format: float32 (12 bytes/vertex)") + + # Test uint16 + uint16 + uint8 + print("\n" + "="*70) + print("Testing quantization schemes...") + print("="*70) + + reconstructed_5, quant_5 = quantize_u16_u16_u8(vertices, header['bounds']) + errors_5 = compute_errors(vertices, reconstructed_5) + print_stats("uint16 + uint16 + uint8", errors_5, header['bounds'], 5, + original_vertex_size, original_total_size, + header['point_count'], header['triangle_count']) + + # Test uint16 + uint16 + uint16 + reconstructed_6, quant_6 = quantize_u16_u16_u16(vertices, header['bounds']) + errors_6 = compute_errors(vertices, reconstructed_6) + print_stats("uint16 + uint16 + uint16", errors_6, header['bounds'], 6, + original_vertex_size, original_total_size, + header['point_count'], header['triangle_count']) + + # Write quantized files if requested + if write_files: + base_path = Path(input_file) + output_5 = base_path.with_suffix('.u16u16u8.mound') + output_6 = base_path.with_suffix('.u16u16u16.mound') + + print(f"\nWriting quantized files...") + write_quantized_mound(output_5, header, quant_5, indices, 'u16_u16_u8') + print(f" {output_5}") + + write_quantized_mound(output_6, header, quant_6, indices, 'u16_u16_u16') + print(f" {output_6}") + + # Verify file sizes + size_5 = Path(output_5).stat().st_size / (1024 * 1024) + size_6 = Path(output_6).stat().st_size / (1024 * 1024) + print(f"\nActual file sizes:") + print(f" uint16+uint16+uint8: {size_5:.2f} MB") + print(f" uint16+uint16+uint16: {size_6:.2f} MB") + + print("\nDone!") + + +if __name__ == '__main__': + main() \ No newline at end of file