investigated if tile quantization would save me data

- it doesn't, after compression it's bigger'
2026-01-21 21:42:05 +01:00
parent 2d660e05a0
commit cf2bcdc059
1 changed files with 416 additions and 0 deletions
@@ -0,0 +1,416 @@
+#!/usr/bin/env python3
+"""
+Test quantization schemes for .mound files and measure precision loss.
+
+Usage:
+    python mound_quantize.py input.mound
+
+This tool:
+1. Reads the original float32 .mound file
+2. Tests different quantization schemes:
+   - uint16 + uint16 + uint8 (5 bytes/vertex)
+   - uint16 + uint16 + uint16 (6 bytes/vertex)
+3. Measures quantization error (mean, median, max)
+4. Reports file size savings
+5. Optionally writes quantized versions for testing
+
+Results:
+ …/moundhunters   master ?  ♥ 21:12   
+❯ python tooling/mount_quantize.py data/MOUND/BS19830748.mound
+Reading data/MOUND/BS19830748.mound...
+
+Original file:
+  Points: 751,240
+  Triangles: 268,718
+  Vertex data: 8.60 MB
+  Index data: 3.08 MB
+  Total size: 11.67 MB
+  Format: float32 (12 bytes/vertex)
+
+======================================================================
+Testing quantization schemes...
+======================================================================
+
+======================================================================
+uint16 + uint16 + uint8
+======================================================================
+Bytes per vertex: 5
+
+Vertex data: 3.58 MB (vs 8.60 MB original)
+  Vertex savings: 58.3%
+Index data: 3.08 MB (unchanged)
+Total file size: 6.66 MB (vs 11.67 MB original)
+  Total savings: 43.0%
+
+Terrain spans: X=498.00m, Y=499.50m, Z=17.81m
+
+X-axis errors (meters):
+  Mean:   0.0000 cm
+  Median: 0.0000 cm
+  Max:    0.0000 cm
+
+Y-axis errors (meters):
+  Mean:   0.0000 cm
+  Median: 0.0000 cm
+  Max:    0.0000 cm
+
+Z-axis errors (meters):
+  Mean:   3.4896 cm
+  Median: 3.4973 cm
+  Max:    6.9824 cm
+
+3D Euclidean errors (meters):
+  Mean:   3.4896 cm
+  Median: 3.4973 cm
+  Max:    6.9824 cm
+
+======================================================================
+uint16 + uint16 + uint16
+======================================================================
+Bytes per vertex: 6
+
+Vertex data: 4.30 MB (vs 8.60 MB original)
+  Vertex savings: 50.0%
+Index data: 3.08 MB (unchanged)
+Total file size: 7.37 MB (vs 11.67 MB original)
+  Total savings: 36.8%
+
+Terrain spans: X=498.00m, Y=499.50m, Z=17.81m
+
+X-axis errors (meters):
+  Mean:   0.0000 cm
+  Median: 0.0000 cm
+  Max:    0.0000 cm
+
+Y-axis errors (meters):
+  Mean:   0.0000 cm
+  Median: 0.0000 cm
+  Max:    0.0000 cm
+
+Z-axis errors (meters):
+  Mean:   0.0135 cm
+  Median: 0.0122 cm
+  Max:    0.0275 cm
+
+3D Euclidean errors (meters):
+  Mean:   0.0135 cm
+  Median: 0.0122 cm
+  Max:    0.0275 cm
+
+Done!
+
+Conclusion:
+totally feasible, but probably not worth it
+
+I tried it anyway! Turns out compression beats me handily:
+Float32 vs Quantized Compression Results: Testing revealed that 
+float32 vertices compress significantly better than quantized 
+uint16+uint16+uint8 formats. Float32 achieves ~65-70% compression 
+with brotli (12 MB → 3.9 MB), while quantized achieves only 
+~35-40% (6.7 MB → 4.2 MB), making the compressed float32 files 
+actually smaller. This is because IEEE 754 floats have inherent 
+structure—spatially correlated terrain points share similar 
+exponents, creating repetitive byte patterns that LZ algorithms 
+exploit effectively. Quantization, while reducing raw file size, 
+spreads values across the full uint16/uint8 range and destroys 
+this natural clustering, increasing entropy and reducing 
+compressibility. The lesson: modern compression algorithms are 
+incredibly sophisticated at exploiting numerical data structure. 
+For web delivery, ship float32 with brotli/gzip—simpler format, 
+better compression, zero precision loss.
+
+"""
+
+import sys
+import struct
+import numpy as np
+from pathlib import Path
+
+
+def read_mound_header(filepath):
+    """Read .mound header."""
+    with open(filepath, 'rb') as f:
+        magic = f.read(4)
+        if magic != b'LIDR':
+            raise ValueError(f"Invalid magic number: {magic}")
+        
+        version = struct.unpack('I', f.read(4))[0]
+        point_count = struct.unpack('I', f.read(4))[0]
+        triangle_count = struct.unpack('I', f.read(4))[0]
+        min_x = struct.unpack('f', f.read(4))[0]
+        min_y = struct.unpack('f', f.read(4))[0]
+        min_z = struct.unpack('f', f.read(4))[0]
+        max_x = struct.unpack('f', f.read(4))[0]
+        max_y = struct.unpack('f', f.read(4))[0]
+        max_z = struct.unpack('f', f.read(4))[0]
+        
+        return {
+            'version': version,
+            'point_count': point_count,
+            'triangle_count': triangle_count,
+            'bounds': {
+                'min_x': min_x, 'max_x': max_x,
+                'min_y': min_y, 'max_y': max_y,
+                'min_z': min_z, 'max_z': max_z,
+            }
+        }
+
+
+def read_mound(filepath):
+    """Read complete .mound file."""
+    header = read_mound_header(filepath)
+    
+    with open(filepath, 'rb') as f:
+        f.seek(64)  # Skip header
+        
+        # Read vertices
+        vertex_bytes = header['point_count'] * 3 * 4  # 3 float32 per vertex
+        vertex_data = f.read(vertex_bytes)
+        vertices = np.frombuffer(vertex_data, dtype=np.float32).reshape(-1, 3)
+        
+        # Read indices
+        index_bytes = header['triangle_count'] * 3 * 4  # 3 uint32 per triangle
+        index_data = f.read(index_bytes)
+        indices = np.frombuffer(index_data, dtype=np.uint32).reshape(-1, 3)
+    
+    return header, vertices, indices
+
+
+def quantize_u16_u16_u8(vertices, bounds):
+    """Quantize to uint16 + uint16 + uint8 (5 bytes/vertex)."""
+    min_x, max_x = bounds['min_x'], bounds['max_x']
+    min_y, max_y = bounds['min_y'], bounds['max_y']
+    min_z, max_z = bounds['min_z'], bounds['max_z']
+    
+    # Normalize to [0, 1]
+    x_norm = (vertices[:, 0] - min_x) / (max_x - min_x)
+    y_norm = (vertices[:, 1] - min_y) / (max_y - min_y)
+    z_norm = (vertices[:, 2] - min_z) / (max_z - min_z)
+    
+    # Quantize
+    x_quant = np.clip(x_norm * 65535, 0, 65535).astype(np.uint16)
+    y_quant = np.clip(y_norm * 65535, 0, 65535).astype(np.uint16)
+    z_quant = np.clip(z_norm * 255, 0, 255).astype(np.uint8)
+    
+    # Dequantize back to float32
+    x_dequant = (x_quant.astype(np.float32) / 65535) * (max_x - min_x) + min_x
+    y_dequant = (y_quant.astype(np.float32) / 65535) * (max_y - min_y) + min_y
+    z_dequant = (z_quant.astype(np.float32) / 255) * (max_z - min_z) + min_z
+    
+    reconstructed = np.column_stack([x_dequant, y_dequant, z_dequant])
+    
+    return reconstructed, (x_quant, y_quant, z_quant)
+
+
+def quantize_u16_u16_u16(vertices, bounds):
+    """Quantize to uint16 + uint16 + uint16 (6 bytes/vertex)."""
+    min_x, max_x = bounds['min_x'], bounds['max_x']
+    min_y, max_y = bounds['min_y'], bounds['max_y']
+    min_z, max_z = bounds['min_z'], bounds['max_z']
+    
+    # Normalize to [0, 1]
+    x_norm = (vertices[:, 0] - min_x) / (max_x - min_x)
+    y_norm = (vertices[:, 1] - min_y) / (max_y - min_y)
+    z_norm = (vertices[:, 2] - min_z) / (max_z - min_z)
+    
+    # Quantize
+    x_quant = np.clip(x_norm * 65535, 0, 65535).astype(np.uint16)
+    y_quant = np.clip(y_norm * 65535, 0, 65535).astype(np.uint16)
+    z_quant = np.clip(z_norm * 65535, 0, 65535).astype(np.uint16)
+    
+    # Dequantize back to float32
+    x_dequant = (x_quant.astype(np.float32) / 65535) * (max_x - min_x) + min_x
+    y_dequant = (y_quant.astype(np.float32) / 65535) * (max_y - min_y) + min_y
+    z_dequant = (z_quant.astype(np.float32) / 65535) * (max_z - min_z) + min_z
+    
+    reconstructed = np.column_stack([x_dequant, y_dequant, z_dequant])
+    
+    return reconstructed, (x_quant, y_quant, z_quant)
+
+
+def compute_errors(original, reconstructed):
+    """Compute per-axis and total errors."""
+    diff = np.abs(original - reconstructed)
+    
+    errors = {
+        'x': {
+            'mean': diff[:, 0].mean(),
+            'median': np.median(diff[:, 0]),
+            'max': diff[:, 0].max(),
+        },
+        'y': {
+            'mean': diff[:, 1].mean(),
+            'median': np.median(diff[:, 1]),
+            'max': diff[:, 1].max(),
+        },
+        'z': {
+            'mean': diff[:, 2].mean(),
+            'median': np.median(diff[:, 2]),
+            'max': diff[:, 2].max(),
+        },
+        'euclidean': {
+            'mean': np.linalg.norm(diff, axis=1).mean(),
+            'median': np.median(np.linalg.norm(diff, axis=1)),
+            'max': np.linalg.norm(diff, axis=1).max(),
+        }
+    }
+    
+    return errors
+
+
+def write_quantized_mound(filepath, header, quantized_data, indices, format_type):
+    """Write quantized .mound file with modified format."""
+    point_count = header['point_count']
+    triangle_count = header['triangle_count']
+    bounds = header['bounds']
+    
+    x_quant, y_quant, z_quant = quantized_data
+    
+    with open(filepath, 'wb') as f:
+        # Header (64 bytes) - same structure, but version indicates quantization
+        f.write(b'LIDR')
+        if format_type == 'u16_u16_u8':
+            f.write(struct.pack('I', 2))  # Version 2 = uint16+uint16+uint8
+        elif format_type == 'u16_u16_u16':
+            f.write(struct.pack('I', 3))  # Version 3 = uint16+uint16+uint16
+        
+        f.write(struct.pack('I', point_count))
+        f.write(struct.pack('I', triangle_count))
+        f.write(struct.pack('f', bounds['min_x']))
+        f.write(struct.pack('f', bounds['min_y']))
+        f.write(struct.pack('f', bounds['min_z']))
+        f.write(struct.pack('f', bounds['max_x']))
+        f.write(struct.pack('f', bounds['max_y']))
+        f.write(struct.pack('f', bounds['max_z']))
+        f.write(b'\x00' * 24)
+        
+        # Write quantized vertices
+        for i in range(point_count):
+            f.write(struct.pack('H', x_quant[i]))
+            f.write(struct.pack('H', y_quant[i]))
+            if format_type == 'u16_u16_u8':
+                f.write(struct.pack('B', z_quant[i]))
+            else:  # u16_u16_u16
+                f.write(struct.pack('H', z_quant[i]))
+        
+        # Write indices (unchanged)
+        f.write(indices.tobytes())
+
+
+def print_stats(name, errors, bounds, bytes_per_vertex, original_vertex_size_mb, 
+                original_total_size_mb, point_count, triangle_count):
+    """Print statistics for a quantization scheme."""
+    span_x = bounds['max_x'] - bounds['min_x']
+    span_y = bounds['max_y'] - bounds['min_y']
+    span_z = bounds['max_z'] - bounds['min_z']
+    
+    # Calculate file sizes
+    header_size = 64
+    vertex_size = point_count * bytes_per_vertex
+    vertex_size_mb = vertex_size / (1024 * 1024)
+    index_size = triangle_count * 3 * 4  # 3 uint32 per triangle
+    index_size_mb = index_size / (1024 * 1024)
+    total_size = header_size + vertex_size + index_size
+    total_size_mb = total_size / (1024 * 1024)
+    
+    vertex_savings_pct = (1 - vertex_size_mb / original_vertex_size_mb) * 100
+    total_savings_pct = (1 - total_size_mb / original_total_size_mb) * 100
+    
+    print(f"\n{'='*70}")
+    print(f"{name}")
+    print(f"{'='*70}")
+    print(f"Bytes per vertex: {bytes_per_vertex}")
+    print(f"\nVertex data: {vertex_size_mb:.2f} MB (vs {original_vertex_size_mb:.2f} MB original)")
+    print(f"  Vertex savings: {vertex_savings_pct:.1f}%")
+    print(f"Index data: {index_size_mb:.2f} MB (unchanged)")
+    print(f"Total file size: {total_size_mb:.2f} MB (vs {original_total_size_mb:.2f} MB original)")
+    print(f"  Total savings: {total_savings_pct:.1f}%")
+    print(f"\nTerrain spans: X={span_x:.2f}m, Y={span_y:.2f}m, Z={span_z:.2f}m")
+    print(f"\nX-axis errors (meters):")
+    print(f"  Mean:   {errors['x']['mean']*100:.4f} cm")
+    print(f"  Median: {errors['x']['median']*100:.4f} cm")
+    print(f"  Max:    {errors['x']['max']*100:.4f} cm")
+    print(f"\nY-axis errors (meters):")
+    print(f"  Mean:   {errors['y']['mean']*100:.4f} cm")
+    print(f"  Median: {errors['y']['median']*100:.4f} cm")
+    print(f"  Max:    {errors['y']['max']*100:.4f} cm")
+    print(f"\nZ-axis errors (meters):")
+    print(f"  Mean:   {errors['z']['mean']*100:.4f} cm")
+    print(f"  Median: {errors['z']['median']*100:.4f} cm")
+    print(f"  Max:    {errors['z']['max']*100:.4f} cm")
+    print(f"\n3D Euclidean errors (meters):")
+    print(f"  Mean:   {errors['euclidean']['mean']*100:.4f} cm")
+    print(f"  Median: {errors['euclidean']['median']*100:.4f} cm")
+    print(f"  Max:    {errors['euclidean']['max']*100:.4f} cm")
+
+
+def main():
+    if len(sys.argv) < 2:
+        print("Usage: python mound_quantize.py input.mound [--write-quantized]")
+        sys.exit(1)
+    
+    input_file = sys.argv[1]
+    write_files = '--write-quantized' in sys.argv
+    
+    if not Path(input_file).exists():
+        print(f"Error: Input file '{input_file}' not found")
+        sys.exit(1)
+    
+    print(f"Reading {input_file}...")
+    header, vertices, indices = read_mound(input_file)
+    
+    original_total_size = Path(input_file).stat().st_size / (1024 * 1024)
+    original_vertex_size = (header['point_count'] * 12) / (1024 * 1024)  # 12 bytes per vertex (3 float32)
+    original_index_size = (header['triangle_count'] * 12) / (1024 * 1024)  # 12 bytes per triangle (3 uint32)
+    
+    print(f"\nOriginal file:")
+    print(f"  Points: {header['point_count']:,}")
+    print(f"  Triangles: {header['triangle_count']:,}")
+    print(f"  Vertex data: {original_vertex_size:.2f} MB")
+    print(f"  Index data: {original_index_size:.2f} MB")
+    print(f"  Total size: {original_total_size:.2f} MB")
+    print(f"  Format: float32 (12 bytes/vertex)")
+    
+    # Test uint16 + uint16 + uint8
+    print("\n" + "="*70)
+    print("Testing quantization schemes...")
+    print("="*70)
+    
+    reconstructed_5, quant_5 = quantize_u16_u16_u8(vertices, header['bounds'])
+    errors_5 = compute_errors(vertices, reconstructed_5)
+    print_stats("uint16 + uint16 + uint8", errors_5, header['bounds'], 5, 
+                original_vertex_size, original_total_size, 
+                header['point_count'], header['triangle_count'])
+    
+    # Test uint16 + uint16 + uint16
+    reconstructed_6, quant_6 = quantize_u16_u16_u16(vertices, header['bounds'])
+    errors_6 = compute_errors(vertices, reconstructed_6)
+    print_stats("uint16 + uint16 + uint16", errors_6, header['bounds'], 6,
+                original_vertex_size, original_total_size,
+                header['point_count'], header['triangle_count'])
+    
+    # Write quantized files if requested
+    if write_files:
+        base_path = Path(input_file)
+        output_5 = base_path.with_suffix('.u16u16u8.mound')
+        output_6 = base_path.with_suffix('.u16u16u16.mound')
+        
+        print(f"\nWriting quantized files...")
+        write_quantized_mound(output_5, header, quant_5, indices, 'u16_u16_u8')
+        print(f"  {output_5}")
+        
+        write_quantized_mound(output_6, header, quant_6, indices, 'u16_u16_u16')
+        print(f"  {output_6}")
+        
+        # Verify file sizes
+        size_5 = Path(output_5).stat().st_size / (1024 * 1024)
+        size_6 = Path(output_6).stat().st_size / (1024 * 1024)
+        print(f"\nActual file sizes:")
+        print(f"  uint16+uint16+uint8:  {size_5:.2f} MB")
+        print(f"  uint16+uint16+uint16: {size_6:.2f} MB")
+    
+    print("\nDone!")
+
+
+if __name__ == '__main__':
+    main()