investigated if tile quantization would save me data
- it doesn't, after compression it's bigger'
This commit is contained in:
416
tooling/mount_quantize.py
Normal file
416
tooling/mount_quantize.py
Normal file
@@ -0,0 +1,416 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test quantization schemes for .mound files and measure precision loss.
|
||||
|
||||
Usage:
|
||||
python mound_quantize.py input.mound
|
||||
|
||||
This tool:
|
||||
1. Reads the original float32 .mound file
|
||||
2. Tests different quantization schemes:
|
||||
- uint16 + uint16 + uint8 (5 bytes/vertex)
|
||||
- uint16 + uint16 + uint16 (6 bytes/vertex)
|
||||
3. Measures quantization error (mean, median, max)
|
||||
4. Reports file size savings
|
||||
5. Optionally writes quantized versions for testing
|
||||
|
||||
Results:
|
||||
…/moundhunters master ? ♥ 21:12
|
||||
❯ python tooling/mount_quantize.py data/MOUND/BS19830748.mound
|
||||
Reading data/MOUND/BS19830748.mound...
|
||||
|
||||
Original file:
|
||||
Points: 751,240
|
||||
Triangles: 268,718
|
||||
Vertex data: 8.60 MB
|
||||
Index data: 3.08 MB
|
||||
Total size: 11.67 MB
|
||||
Format: float32 (12 bytes/vertex)
|
||||
|
||||
======================================================================
|
||||
Testing quantization schemes...
|
||||
======================================================================
|
||||
|
||||
======================================================================
|
||||
uint16 + uint16 + uint8
|
||||
======================================================================
|
||||
Bytes per vertex: 5
|
||||
|
||||
Vertex data: 3.58 MB (vs 8.60 MB original)
|
||||
Vertex savings: 58.3%
|
||||
Index data: 3.08 MB (unchanged)
|
||||
Total file size: 6.66 MB (vs 11.67 MB original)
|
||||
Total savings: 43.0%
|
||||
|
||||
Terrain spans: X=498.00m, Y=499.50m, Z=17.81m
|
||||
|
||||
X-axis errors (meters):
|
||||
Mean: 0.0000 cm
|
||||
Median: 0.0000 cm
|
||||
Max: 0.0000 cm
|
||||
|
||||
Y-axis errors (meters):
|
||||
Mean: 0.0000 cm
|
||||
Median: 0.0000 cm
|
||||
Max: 0.0000 cm
|
||||
|
||||
Z-axis errors (meters):
|
||||
Mean: 3.4896 cm
|
||||
Median: 3.4973 cm
|
||||
Max: 6.9824 cm
|
||||
|
||||
3D Euclidean errors (meters):
|
||||
Mean: 3.4896 cm
|
||||
Median: 3.4973 cm
|
||||
Max: 6.9824 cm
|
||||
|
||||
======================================================================
|
||||
uint16 + uint16 + uint16
|
||||
======================================================================
|
||||
Bytes per vertex: 6
|
||||
|
||||
Vertex data: 4.30 MB (vs 8.60 MB original)
|
||||
Vertex savings: 50.0%
|
||||
Index data: 3.08 MB (unchanged)
|
||||
Total file size: 7.37 MB (vs 11.67 MB original)
|
||||
Total savings: 36.8%
|
||||
|
||||
Terrain spans: X=498.00m, Y=499.50m, Z=17.81m
|
||||
|
||||
X-axis errors (meters):
|
||||
Mean: 0.0000 cm
|
||||
Median: 0.0000 cm
|
||||
Max: 0.0000 cm
|
||||
|
||||
Y-axis errors (meters):
|
||||
Mean: 0.0000 cm
|
||||
Median: 0.0000 cm
|
||||
Max: 0.0000 cm
|
||||
|
||||
Z-axis errors (meters):
|
||||
Mean: 0.0135 cm
|
||||
Median: 0.0122 cm
|
||||
Max: 0.0275 cm
|
||||
|
||||
3D Euclidean errors (meters):
|
||||
Mean: 0.0135 cm
|
||||
Median: 0.0122 cm
|
||||
Max: 0.0275 cm
|
||||
|
||||
Done!
|
||||
|
||||
Conclusion:
|
||||
totally feasible, but probably not worth it
|
||||
|
||||
I tried it anyway! Turns out compression beats me handily:
|
||||
Float32 vs Quantized Compression Results: Testing revealed that
|
||||
float32 vertices compress significantly better than quantized
|
||||
uint16+uint16+uint8 formats. Float32 achieves ~65-70% compression
|
||||
with brotli (12 MB → 3.9 MB), while quantized achieves only
|
||||
~35-40% (6.7 MB → 4.2 MB), making the compressed float32 files
|
||||
actually smaller. This is because IEEE 754 floats have inherent
|
||||
structure—spatially correlated terrain points share similar
|
||||
exponents, creating repetitive byte patterns that LZ algorithms
|
||||
exploit effectively. Quantization, while reducing raw file size,
|
||||
spreads values across the full uint16/uint8 range and destroys
|
||||
this natural clustering, increasing entropy and reducing
|
||||
compressibility. The lesson: modern compression algorithms are
|
||||
incredibly sophisticated at exploiting numerical data structure.
|
||||
For web delivery, ship float32 with brotli/gzip—simpler format,
|
||||
better compression, zero precision loss.
|
||||
|
||||
"""
|
||||
|
||||
import sys
|
||||
import struct
|
||||
import numpy as np
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def read_mound_header(filepath):
|
||||
"""Read .mound header."""
|
||||
with open(filepath, 'rb') as f:
|
||||
magic = f.read(4)
|
||||
if magic != b'LIDR':
|
||||
raise ValueError(f"Invalid magic number: {magic}")
|
||||
|
||||
version = struct.unpack('I', f.read(4))[0]
|
||||
point_count = struct.unpack('I', f.read(4))[0]
|
||||
triangle_count = struct.unpack('I', f.read(4))[0]
|
||||
min_x = struct.unpack('f', f.read(4))[0]
|
||||
min_y = struct.unpack('f', f.read(4))[0]
|
||||
min_z = struct.unpack('f', f.read(4))[0]
|
||||
max_x = struct.unpack('f', f.read(4))[0]
|
||||
max_y = struct.unpack('f', f.read(4))[0]
|
||||
max_z = struct.unpack('f', f.read(4))[0]
|
||||
|
||||
return {
|
||||
'version': version,
|
||||
'point_count': point_count,
|
||||
'triangle_count': triangle_count,
|
||||
'bounds': {
|
||||
'min_x': min_x, 'max_x': max_x,
|
||||
'min_y': min_y, 'max_y': max_y,
|
||||
'min_z': min_z, 'max_z': max_z,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def read_mound(filepath):
|
||||
"""Read complete .mound file."""
|
||||
header = read_mound_header(filepath)
|
||||
|
||||
with open(filepath, 'rb') as f:
|
||||
f.seek(64) # Skip header
|
||||
|
||||
# Read vertices
|
||||
vertex_bytes = header['point_count'] * 3 * 4 # 3 float32 per vertex
|
||||
vertex_data = f.read(vertex_bytes)
|
||||
vertices = np.frombuffer(vertex_data, dtype=np.float32).reshape(-1, 3)
|
||||
|
||||
# Read indices
|
||||
index_bytes = header['triangle_count'] * 3 * 4 # 3 uint32 per triangle
|
||||
index_data = f.read(index_bytes)
|
||||
indices = np.frombuffer(index_data, dtype=np.uint32).reshape(-1, 3)
|
||||
|
||||
return header, vertices, indices
|
||||
|
||||
|
||||
def quantize_u16_u16_u8(vertices, bounds):
|
||||
"""Quantize to uint16 + uint16 + uint8 (5 bytes/vertex)."""
|
||||
min_x, max_x = bounds['min_x'], bounds['max_x']
|
||||
min_y, max_y = bounds['min_y'], bounds['max_y']
|
||||
min_z, max_z = bounds['min_z'], bounds['max_z']
|
||||
|
||||
# Normalize to [0, 1]
|
||||
x_norm = (vertices[:, 0] - min_x) / (max_x - min_x)
|
||||
y_norm = (vertices[:, 1] - min_y) / (max_y - min_y)
|
||||
z_norm = (vertices[:, 2] - min_z) / (max_z - min_z)
|
||||
|
||||
# Quantize
|
||||
x_quant = np.clip(x_norm * 65535, 0, 65535).astype(np.uint16)
|
||||
y_quant = np.clip(y_norm * 65535, 0, 65535).astype(np.uint16)
|
||||
z_quant = np.clip(z_norm * 255, 0, 255).astype(np.uint8)
|
||||
|
||||
# Dequantize back to float32
|
||||
x_dequant = (x_quant.astype(np.float32) / 65535) * (max_x - min_x) + min_x
|
||||
y_dequant = (y_quant.astype(np.float32) / 65535) * (max_y - min_y) + min_y
|
||||
z_dequant = (z_quant.astype(np.float32) / 255) * (max_z - min_z) + min_z
|
||||
|
||||
reconstructed = np.column_stack([x_dequant, y_dequant, z_dequant])
|
||||
|
||||
return reconstructed, (x_quant, y_quant, z_quant)
|
||||
|
||||
|
||||
def quantize_u16_u16_u16(vertices, bounds):
|
||||
"""Quantize to uint16 + uint16 + uint16 (6 bytes/vertex)."""
|
||||
min_x, max_x = bounds['min_x'], bounds['max_x']
|
||||
min_y, max_y = bounds['min_y'], bounds['max_y']
|
||||
min_z, max_z = bounds['min_z'], bounds['max_z']
|
||||
|
||||
# Normalize to [0, 1]
|
||||
x_norm = (vertices[:, 0] - min_x) / (max_x - min_x)
|
||||
y_norm = (vertices[:, 1] - min_y) / (max_y - min_y)
|
||||
z_norm = (vertices[:, 2] - min_z) / (max_z - min_z)
|
||||
|
||||
# Quantize
|
||||
x_quant = np.clip(x_norm * 65535, 0, 65535).astype(np.uint16)
|
||||
y_quant = np.clip(y_norm * 65535, 0, 65535).astype(np.uint16)
|
||||
z_quant = np.clip(z_norm * 65535, 0, 65535).astype(np.uint16)
|
||||
|
||||
# Dequantize back to float32
|
||||
x_dequant = (x_quant.astype(np.float32) / 65535) * (max_x - min_x) + min_x
|
||||
y_dequant = (y_quant.astype(np.float32) / 65535) * (max_y - min_y) + min_y
|
||||
z_dequant = (z_quant.astype(np.float32) / 65535) * (max_z - min_z) + min_z
|
||||
|
||||
reconstructed = np.column_stack([x_dequant, y_dequant, z_dequant])
|
||||
|
||||
return reconstructed, (x_quant, y_quant, z_quant)
|
||||
|
||||
|
||||
def compute_errors(original, reconstructed):
|
||||
"""Compute per-axis and total errors."""
|
||||
diff = np.abs(original - reconstructed)
|
||||
|
||||
errors = {
|
||||
'x': {
|
||||
'mean': diff[:, 0].mean(),
|
||||
'median': np.median(diff[:, 0]),
|
||||
'max': diff[:, 0].max(),
|
||||
},
|
||||
'y': {
|
||||
'mean': diff[:, 1].mean(),
|
||||
'median': np.median(diff[:, 1]),
|
||||
'max': diff[:, 1].max(),
|
||||
},
|
||||
'z': {
|
||||
'mean': diff[:, 2].mean(),
|
||||
'median': np.median(diff[:, 2]),
|
||||
'max': diff[:, 2].max(),
|
||||
},
|
||||
'euclidean': {
|
||||
'mean': np.linalg.norm(diff, axis=1).mean(),
|
||||
'median': np.median(np.linalg.norm(diff, axis=1)),
|
||||
'max': np.linalg.norm(diff, axis=1).max(),
|
||||
}
|
||||
}
|
||||
|
||||
return errors
|
||||
|
||||
|
||||
def write_quantized_mound(filepath, header, quantized_data, indices, format_type):
|
||||
"""Write quantized .mound file with modified format."""
|
||||
point_count = header['point_count']
|
||||
triangle_count = header['triangle_count']
|
||||
bounds = header['bounds']
|
||||
|
||||
x_quant, y_quant, z_quant = quantized_data
|
||||
|
||||
with open(filepath, 'wb') as f:
|
||||
# Header (64 bytes) - same structure, but version indicates quantization
|
||||
f.write(b'LIDR')
|
||||
if format_type == 'u16_u16_u8':
|
||||
f.write(struct.pack('I', 2)) # Version 2 = uint16+uint16+uint8
|
||||
elif format_type == 'u16_u16_u16':
|
||||
f.write(struct.pack('I', 3)) # Version 3 = uint16+uint16+uint16
|
||||
|
||||
f.write(struct.pack('I', point_count))
|
||||
f.write(struct.pack('I', triangle_count))
|
||||
f.write(struct.pack('f', bounds['min_x']))
|
||||
f.write(struct.pack('f', bounds['min_y']))
|
||||
f.write(struct.pack('f', bounds['min_z']))
|
||||
f.write(struct.pack('f', bounds['max_x']))
|
||||
f.write(struct.pack('f', bounds['max_y']))
|
||||
f.write(struct.pack('f', bounds['max_z']))
|
||||
f.write(b'\x00' * 24)
|
||||
|
||||
# Write quantized vertices
|
||||
for i in range(point_count):
|
||||
f.write(struct.pack('H', x_quant[i]))
|
||||
f.write(struct.pack('H', y_quant[i]))
|
||||
if format_type == 'u16_u16_u8':
|
||||
f.write(struct.pack('B', z_quant[i]))
|
||||
else: # u16_u16_u16
|
||||
f.write(struct.pack('H', z_quant[i]))
|
||||
|
||||
# Write indices (unchanged)
|
||||
f.write(indices.tobytes())
|
||||
|
||||
|
||||
def print_stats(name, errors, bounds, bytes_per_vertex, original_vertex_size_mb,
|
||||
original_total_size_mb, point_count, triangle_count):
|
||||
"""Print statistics for a quantization scheme."""
|
||||
span_x = bounds['max_x'] - bounds['min_x']
|
||||
span_y = bounds['max_y'] - bounds['min_y']
|
||||
span_z = bounds['max_z'] - bounds['min_z']
|
||||
|
||||
# Calculate file sizes
|
||||
header_size = 64
|
||||
vertex_size = point_count * bytes_per_vertex
|
||||
vertex_size_mb = vertex_size / (1024 * 1024)
|
||||
index_size = triangle_count * 3 * 4 # 3 uint32 per triangle
|
||||
index_size_mb = index_size / (1024 * 1024)
|
||||
total_size = header_size + vertex_size + index_size
|
||||
total_size_mb = total_size / (1024 * 1024)
|
||||
|
||||
vertex_savings_pct = (1 - vertex_size_mb / original_vertex_size_mb) * 100
|
||||
total_savings_pct = (1 - total_size_mb / original_total_size_mb) * 100
|
||||
|
||||
print(f"\n{'='*70}")
|
||||
print(f"{name}")
|
||||
print(f"{'='*70}")
|
||||
print(f"Bytes per vertex: {bytes_per_vertex}")
|
||||
print(f"\nVertex data: {vertex_size_mb:.2f} MB (vs {original_vertex_size_mb:.2f} MB original)")
|
||||
print(f" Vertex savings: {vertex_savings_pct:.1f}%")
|
||||
print(f"Index data: {index_size_mb:.2f} MB (unchanged)")
|
||||
print(f"Total file size: {total_size_mb:.2f} MB (vs {original_total_size_mb:.2f} MB original)")
|
||||
print(f" Total savings: {total_savings_pct:.1f}%")
|
||||
print(f"\nTerrain spans: X={span_x:.2f}m, Y={span_y:.2f}m, Z={span_z:.2f}m")
|
||||
print(f"\nX-axis errors (meters):")
|
||||
print(f" Mean: {errors['x']['mean']*100:.4f} cm")
|
||||
print(f" Median: {errors['x']['median']*100:.4f} cm")
|
||||
print(f" Max: {errors['x']['max']*100:.4f} cm")
|
||||
print(f"\nY-axis errors (meters):")
|
||||
print(f" Mean: {errors['y']['mean']*100:.4f} cm")
|
||||
print(f" Median: {errors['y']['median']*100:.4f} cm")
|
||||
print(f" Max: {errors['y']['max']*100:.4f} cm")
|
||||
print(f"\nZ-axis errors (meters):")
|
||||
print(f" Mean: {errors['z']['mean']*100:.4f} cm")
|
||||
print(f" Median: {errors['z']['median']*100:.4f} cm")
|
||||
print(f" Max: {errors['z']['max']*100:.4f} cm")
|
||||
print(f"\n3D Euclidean errors (meters):")
|
||||
print(f" Mean: {errors['euclidean']['mean']*100:.4f} cm")
|
||||
print(f" Median: {errors['euclidean']['median']*100:.4f} cm")
|
||||
print(f" Max: {errors['euclidean']['max']*100:.4f} cm")
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: python mound_quantize.py input.mound [--write-quantized]")
|
||||
sys.exit(1)
|
||||
|
||||
input_file = sys.argv[1]
|
||||
write_files = '--write-quantized' in sys.argv
|
||||
|
||||
if not Path(input_file).exists():
|
||||
print(f"Error: Input file '{input_file}' not found")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"Reading {input_file}...")
|
||||
header, vertices, indices = read_mound(input_file)
|
||||
|
||||
original_total_size = Path(input_file).stat().st_size / (1024 * 1024)
|
||||
original_vertex_size = (header['point_count'] * 12) / (1024 * 1024) # 12 bytes per vertex (3 float32)
|
||||
original_index_size = (header['triangle_count'] * 12) / (1024 * 1024) # 12 bytes per triangle (3 uint32)
|
||||
|
||||
print(f"\nOriginal file:")
|
||||
print(f" Points: {header['point_count']:,}")
|
||||
print(f" Triangles: {header['triangle_count']:,}")
|
||||
print(f" Vertex data: {original_vertex_size:.2f} MB")
|
||||
print(f" Index data: {original_index_size:.2f} MB")
|
||||
print(f" Total size: {original_total_size:.2f} MB")
|
||||
print(f" Format: float32 (12 bytes/vertex)")
|
||||
|
||||
# Test uint16 + uint16 + uint8
|
||||
print("\n" + "="*70)
|
||||
print("Testing quantization schemes...")
|
||||
print("="*70)
|
||||
|
||||
reconstructed_5, quant_5 = quantize_u16_u16_u8(vertices, header['bounds'])
|
||||
errors_5 = compute_errors(vertices, reconstructed_5)
|
||||
print_stats("uint16 + uint16 + uint8", errors_5, header['bounds'], 5,
|
||||
original_vertex_size, original_total_size,
|
||||
header['point_count'], header['triangle_count'])
|
||||
|
||||
# Test uint16 + uint16 + uint16
|
||||
reconstructed_6, quant_6 = quantize_u16_u16_u16(vertices, header['bounds'])
|
||||
errors_6 = compute_errors(vertices, reconstructed_6)
|
||||
print_stats("uint16 + uint16 + uint16", errors_6, header['bounds'], 6,
|
||||
original_vertex_size, original_total_size,
|
||||
header['point_count'], header['triangle_count'])
|
||||
|
||||
# Write quantized files if requested
|
||||
if write_files:
|
||||
base_path = Path(input_file)
|
||||
output_5 = base_path.with_suffix('.u16u16u8.mound')
|
||||
output_6 = base_path.with_suffix('.u16u16u16.mound')
|
||||
|
||||
print(f"\nWriting quantized files...")
|
||||
write_quantized_mound(output_5, header, quant_5, indices, 'u16_u16_u8')
|
||||
print(f" {output_5}")
|
||||
|
||||
write_quantized_mound(output_6, header, quant_6, indices, 'u16_u16_u16')
|
||||
print(f" {output_6}")
|
||||
|
||||
# Verify file sizes
|
||||
size_5 = Path(output_5).stat().st_size / (1024 * 1024)
|
||||
size_6 = Path(output_6).stat().st_size / (1024 * 1024)
|
||||
print(f"\nActual file sizes:")
|
||||
print(f" uint16+uint16+uint8: {size_5:.2f} MB")
|
||||
print(f" uint16+uint16+uint16: {size_6:.2f} MB")
|
||||
|
||||
print("\nDone!")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user