Optimize gnaw for your specific use case and hardware configuration.
gnaw's performance can be tuned through the gnaw.toml configuration file:
# gnaw.toml - Performance tuning configuration
# Number of lines to buffer before streaming output
chunk_size = 100
# Output format: "ascii_box" or "json"
output_format = "ascii_box"
# Enable color output
color = true
# Color for highlighting matches
match_color = "cyan"
# I/O chunk size for large files (in bytes)
io_chunk_size_bytes = 8388608 # 8 MB
# Performance tuning for different file sizes
[tuning]
tiny = "64 KiB"
small = "8.6 MiB"
medium = "256 MiB"
large = "2 GiB"
# Regex engine configuration
[regex.simple]
size_limit_mb = 200
dfa_size_limit_mb = 200
nest_limit = 250
[regex.complex]
size_limit_mb = 100
dfa_size_limit_mb = 100
nest_limit = 200
Adjust file size thresholds based on your typical workloads:
[tuning]
tiny = "64 KiB" # Small files: minimal overhead
small = "8.6 MiB" # Medium files: balanced approach
medium = "256 MiB" # Large files: streaming optimization
large = "2 GiB" # Very large files: chunked processing
Optimize I/O operations for your hardware:
# Increase for better disk throughput
io_chunk_size_bytes = 16777216 # 16 MB
# Decrease for lower memory usage
io_chunk_size_bytes = 4194304 # 4 MB
# Number of lines to buffer before streaming
chunk_size = 100
# For high-memory systems
chunk_size = 500
# For low-memory systems
chunk_size = 50
For straightforward patterns, use higher limits:
[regex.simple]
size_limit_mb = 200 # Maximum memory for regex engine
dfa_size_limit_mb = 200 # Maximum DFA size
nest_limit = 250 # Maximum nesting depth
For complex regex patterns, use conservative limits:
[regex.complex]
size_limit_mb = 100 # Reduced memory limit
dfa_size_limit_mb = 100 # Reduced DFA size
nest_limit = 200 # Reduced nesting depth
gnaw automatically uses all available CPU cores. You can influence this with environment variables:
# Limit thread count
RAYON_NUM_THREADS=4 gnaw "pattern" file.txt
# Use all cores (default)
RAYON_NUM_THREADS=0 gnaw "pattern" file.txt
# High-memory systems (16GB+)
io_chunk_size_bytes = 16777216 # 16 MB
chunk_size = 500
# Medium-memory systems (8GB)
io_chunk_size_bytes = 8388608 # 8 MB
chunk_size = 100
# Low-memory systems (4GB)
io_chunk_size_bytes = 4194304 # 4 MB
chunk_size = 50
# High-core systems (16+ cores)
RAYON_NUM_THREADS=16 gnaw "pattern" file.txt
# Medium-core systems (8 cores)
RAYON_NUM_THREADS=8 gnaw "pattern" file.txt
# Low-core systems (4 cores)
RAYON_NUM_THREADS=4 gnaw "pattern" file.txt
For log file analysis:
# Optimized for log files
[tuning]
tiny = "1 MiB" # Log files are typically larger
small = "10 MiB"
medium = "100 MiB"
large = "1 GiB"
# Streaming for real-time analysis
chunk_size = 50
io_chunk_size_bytes = 4194304 # 4 MB
For codebase search:
# Optimized for code files
[tuning]
tiny = "32 KiB" # Code files are typically smaller
small = "1 MiB"
medium = "10 MiB"
large = "100 MiB"
# Higher buffering for better context
chunk_size = 200
For very large datasets:
# Aggressive streaming
chunk_size = 25
io_chunk_size_bytes = 2097152 # 2 MB
# Conservative memory usage
[regex.simple]
size_limit_mb = 50
dfa_size_limit_mb = 50
Enable performance monitoring:
# Debug logging
RUST_LOG=debug gnaw "pattern" file.txt
# Performance profiling
cargo flamegraph --release --bin gnaw -- "pattern" file.txt
Create performance tests:
# Generate test data
cargo run --bin generate_test_logs
# Run benchmarks
./scripts/bench_compare.sh
# View results
cd dashboard
streamlit run streamlit_app.py
# Monitor CPU usage
htop
# Monitor memory usage
free -h
# Monitor I/O
iostat -x 1