Batch Image Processing Workflows: Scale Your Optimization
Process thousands of images efficiently with batch workflows. Learn command-line tools, scripting techniques, and cloud services for large-scale image optimization.
When you’re dealing with hundreds or thousands of images, manual optimization isn’t feasible. This guide covers efficient batch processing workflows using command-line tools, scripts, and cloud services.
Batch Processing Fundamentals
The Batch Processing Pipeline
┌──────────────┐ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐
│ Input │────▶│ Process │────▶│ Validate │────▶│ Output │
│ (Source) │ │ (Transform) │ │ (QA) │ │ (Optimized) │
└──────────────┘ └──────────────┘ └──────────────┘ └──────────────┘
│
┌──────────────┼──────────────┐
▼ ▼ ▼
[Resize] [Convert] [Compress]
Key Metrics
| Metric | Target | Why It Matters |
|---|---|---|
| Throughput | 100+ images/min | Processing speed |
| Memory usage | < 4GB | Stability |
| Error rate | < 0.1% | Reliability |
| Size reduction | 40-70% | Effectiveness |
Command-Line Tools
ImageMagick Batch Operations
# Convert all JPEGs to WebP
mogrify -format webp -quality 80 *.jpg
# Resize all images to max 2000px width
mogrify -resize '2000x>' *.jpg
# Batch convert with custom output directory
for f in *.jpg; do
convert "$f" -quality 80 -resize '1600x>' "output/${f%.jpg}.webp"
done
# Parallel processing with GNU Parallel
find . -name "*.jpg" | parallel -j 8 convert {} -quality 80 {.}.webp
# Strip metadata from all images
mogrify -strip *.jpg
# Create thumbnails
mogrify -resize 200x200^ -gravity center -extent 200x200 -path thumbnails *.jpg
Sharp CLI (Node.js)
Install globally:
npm install -g sharp-cli
Batch operations:
# Convert to WebP
sharp --input "*.jpg" --output output/ --format webp --quality 80
# Generate multiple sizes
for size in 320 640 1024 1600; do
sharp --input "*.jpg" --output "output/${size}/" --resize $size
done
libvips (vips command)
Fastest option for large batches:
# Convert to WebP
for f in *.jpg; do
vips copy "$f" "${f%.jpg}.webp[Q=80]"
done
# Thumbnail generation (very fast)
for f in *.jpg; do
vips thumbnail "$f" "thumbs/${f%.jpg}.webp" 200 --height 200 --crop centre
done
# Parallel with xargs
find . -name "*.jpg" -print0 | xargs -0 -P 8 -I {} vips thumbnail {} thumbs/{}.webp 200
Format-Specific Tools
# JPEG optimization (jpegoptim)
jpegoptim --max=80 --strip-all --all-progressive *.jpg
# PNG optimization (pngquant + optipng)
pngquant --quality=65-80 --ext .png --force *.png
optipng -o5 *.png
# WebP conversion (cwebp)
for f in *.jpg; do cwebp -q 80 "$f" -o "${f%.jpg}.webp"; done
# AVIF conversion (avifenc)
for f in *.jpg; do avifenc --min 20 --max 30 "$f" "${f%.jpg}.avif"; done
# SVG optimization (svgo)
svgo -f ./svgs/ -o ./svgs-optimized/
Node.js Batch Processing
Basic Sharp Script
// batch-optimize.js
const sharp = require('sharp');
const fs = require('fs').promises;
const path = require('path');
const { glob } = require('glob');
const CONFIG = {
inputDir: './input',
outputDir: './output',
formats: ['webp', 'avif'],
sizes: [320, 640, 1024, 1600, 2400],
quality: { webp: 80, avif: 65, jpeg: 80 }
};
async function processImage(inputPath) {
const filename = path.basename(inputPath, path.extname(inputPath));
const image = sharp(inputPath);
const metadata = await image.metadata();
const tasks = [];
for (const width of CONFIG.sizes) {
// Skip sizes larger than original
if (width > metadata.width) continue;
for (const format of CONFIG.formats) {
const outputPath = path.join(
CONFIG.outputDir,
`${filename}-${width}.${format}`
);
tasks.push(
sharp(inputPath)
.resize(width)
.toFormat(format, { quality: CONFIG.quality[format] })
.toFile(outputPath)
.then(() => console.log(`Created: ${outputPath}`))
);
}
}
await Promise.all(tasks);
}
async function main() {
await fs.mkdir(CONFIG.outputDir, { recursive: true });
const images = await glob(`${CONFIG.inputDir}/**/*.{jpg,jpeg,png}`);
console.log(`Processing ${images.length} images...`);
// Process in batches to avoid memory issues
const batchSize = 10;
for (let i = 0; i < images.length; i += batchSize) {
const batch = images.slice(i, i + batchSize);
await Promise.all(batch.map(processImage));
console.log(`Progress: ${Math.min(i + batchSize, images.length)}/${images.length}`);
}
console.log('Done!');
}
main().catch(console.error);
Advanced Pipeline with Streams
// stream-processor.js
const sharp = require('sharp');
const fs = require('fs');
const path = require('path');
const { pipeline } = require('stream/promises');
class ImageProcessor {
constructor(options = {}) {
this.concurrency = options.concurrency || 4;
this.queue = [];
this.processing = 0;
}
async process(inputPath, outputConfigs) {
const inputStream = fs.createReadStream(inputPath);
const tasks = outputConfigs.map(async (config) => {
const transformer = sharp()
.resize(config.width, config.height, { fit: 'inside' })
.toFormat(config.format, { quality: config.quality });
const outputStream = fs.createWriteStream(config.outputPath);
// Clone the input for each output
const clonedInput = sharp();
inputStream.pipe(clonedInput);
await pipeline(clonedInput, transformer, outputStream);
});
await Promise.all(tasks);
}
async processDirectory(inputDir, outputDir, configs) {
const files = fs.readdirSync(inputDir)
.filter(f => /\.(jpg|jpeg|png)$/i.test(f));
for (const file of files) {
const inputPath = path.join(inputDir, file);
const baseName = path.basename(file, path.extname(file));
const outputConfigs = configs.map(c => ({
...c,
outputPath: path.join(outputDir, `${baseName}-${c.width}.${c.format}`)
}));
await this.process(inputPath, outputConfigs);
console.log(`Processed: ${file}`);
}
}
}
// Usage
const processor = new ImageProcessor({ concurrency: 4 });
processor.processDirectory('./input', './output', [
{ width: 640, format: 'webp', quality: 80 },
{ width: 1280, format: 'webp', quality: 80 },
{ width: 640, format: 'avif', quality: 65 },
{ width: 1280, format: 'avif', quality: 65 }
]);
Worker Threads for CPU-Intensive Processing
// worker-pool.js
const { Worker, isMainThread, parentPort, workerData } = require('worker_threads');
const os = require('os');
const path = require('path');
if (isMainThread) {
// Main thread - distribute work
class WorkerPool {
constructor(workerScript, numWorkers = os.cpus().length) {
this.workers = [];
this.queue = [];
this.activeWorkers = 0;
for (let i = 0; i < numWorkers; i++) {
const worker = new Worker(workerScript);
worker.on('message', (result) => this.handleResult(worker, result));
this.workers.push({ worker, busy: false });
}
}
process(task) {
return new Promise((resolve, reject) => {
this.queue.push({ task, resolve, reject });
this.runNext();
});
}
runNext() {
const availableWorker = this.workers.find(w => !w.busy);
if (!availableWorker || this.queue.length === 0) return;
const { task, resolve, reject } = this.queue.shift();
availableWorker.busy = true;
availableWorker.resolve = resolve;
availableWorker.reject = reject;
availableWorker.worker.postMessage(task);
}
handleResult(worker, result) {
const workerInfo = this.workers.find(w => w.worker === worker);
workerInfo.busy = false;
if (result.error) {
workerInfo.reject(new Error(result.error));
} else {
workerInfo.resolve(result);
}
this.runNext();
}
async processAll(tasks) {
return Promise.all(tasks.map(task => this.process(task)));
}
}
module.exports = WorkerPool;
} else {
// Worker thread - process individual images
const sharp = require('sharp');
parentPort.on('message', async (task) => {
try {
await sharp(task.input)
.resize(task.width)
.toFormat(task.format, { quality: task.quality })
.toFile(task.output);
parentPort.postMessage({ success: true, output: task.output });
} catch (error) {
parentPort.postMessage({ error: error.message });
}
});
}
Python Batch Processing
Pillow (PIL) Script
# batch_optimize.py
from PIL import Image
import os
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor, as_completed
CONFIG = {
'input_dir': './input',
'output_dir': './output',
'sizes': [320, 640, 1024, 1600],
'quality': 80,
'formats': ['webp', 'jpeg']
}
def optimize_image(input_path, output_dir, sizes, quality):
"""Process a single image into multiple sizes and formats."""
results = []
img = Image.open(input_path)
filename = Path(input_path).stem
# Convert to RGB if necessary (for JPEG output)
if img.mode in ('RGBA', 'P'):
rgb_img = Image.new('RGB', img.size, (255, 255, 255))
if img.mode == 'RGBA':
rgb_img.paste(img, mask=img.split()[3])
else:
rgb_img.paste(img)
img = rgb_img
for width in sizes:
if width > img.width:
continue
# Calculate height maintaining aspect ratio
ratio = width / img.width
height = int(img.height * ratio)
resized = img.resize((width, height), Image.LANCZOS)
for fmt in CONFIG['formats']:
output_path = os.path.join(output_dir, f"{filename}-{width}.{fmt}")
if fmt == 'webp':
resized.save(output_path, 'WEBP', quality=quality)
elif fmt == 'jpeg':
resized.save(output_path, 'JPEG', quality=quality, optimize=True)
results.append(output_path)
return results
def batch_process(input_dir, output_dir, max_workers=4):
"""Process all images in input directory."""
os.makedirs(output_dir, exist_ok=True)
image_files = list(Path(input_dir).glob('*.jpg')) + \
list(Path(input_dir).glob('*.jpeg')) + \
list(Path(input_dir).glob('*.png'))
print(f"Processing {len(image_files)} images with {max_workers} workers...")
with ThreadPoolExecutor(max_workers=max_workers) as executor:
futures = {
executor.submit(
optimize_image,
str(img),
output_dir,
CONFIG['sizes'],
CONFIG['quality']
): img for img in image_files
}
for future in as_completed(futures):
img = futures[future]
try:
results = future.result()
print(f"✓ {img.name} -> {len(results)} variants")
except Exception as e:
print(f"✗ {img.name}: {e}")
if __name__ == '__main__':
batch_process(CONFIG['input_dir'], CONFIG['output_dir'])
Cloud-Based Batch Processing
Sirv Bulk Upload and Processing
Sirv provides powerful batch processing capabilities:
// sirv-batch-upload.js
const fetch = require('node-fetch');
const fs = require('fs');
const path = require('path');
const FormData = require('form-data');
class SirvBatchProcessor {
constructor(clientId, clientSecret) {
this.clientId = clientId;
this.clientSecret = clientSecret;
this.token = null;
}
async authenticate() {
const response = await fetch('https://api.sirv.com/v2/token', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
clientId: this.clientId,
clientSecret: this.clientSecret
})
});
const data = await response.json();
this.token = data.token;
return this.token;
}
async uploadImage(localPath, remotePath) {
const fileBuffer = fs.readFileSync(localPath);
const response = await fetch(
`https://api.sirv.com/v2/files/upload?filename=${encodeURIComponent(remotePath)}`,
{
method: 'POST',
headers: {
'Authorization': `Bearer ${this.token}`,
'Content-Type': 'application/octet-stream'
},
body: fileBuffer
}
);
return response.ok;
}
async processDirectory(localDir, remoteDir) {
await this.authenticate();
const files = fs.readdirSync(localDir)
.filter(f => /\.(jpg|jpeg|png|gif)$/i.test(f));
console.log(`Uploading ${files.length} files to Sirv...`);
for (const file of files) {
const localPath = path.join(localDir, file);
const remotePath = `${remoteDir}/${file}`;
const success = await this.uploadImage(localPath, remotePath);
console.log(`${success ? '✓' : '✗'} ${file}`);
}
}
}
// Usage
const processor = new SirvBatchProcessor(
process.env.SIRV_CLIENT_ID,
process.env.SIRV_CLIENT_SECRET
);
processor.processDirectory('./images', '/products');
Once uploaded to Sirv, images are automatically optimized and served with on-the-fly transformations:
<!-- Sirv handles all optimization automatically -->
<img src="https://your-account.sirv.com/products/photo.jpg?w=800&format=optimal">
Sirv AI Studio for Batch Processing
For advanced batch operations like background removal, Sirv AI Studio offers:
- Bulk background removal
- Automatic color correction
- Smart cropping
- Product image enhancement
Access via API:
// Remove backgrounds from multiple images
const images = ['product1.jpg', 'product2.jpg', 'product3.jpg'];
for (const image of images) {
const result = await fetch('https://api.sirv.studio/v1/remove-background', {
method: 'POST',
headers: {
'Authorization': `Bearer ${apiKey}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({
image_url: `https://your-account.sirv.com/products/${image}`
})
});
// Processed image URL returned
}
Performance Optimization
Memory Management
// Process in controlled batches to avoid memory issues
async function processBatched(files, batchSize = 5) {
for (let i = 0; i < files.length; i += batchSize) {
const batch = files.slice(i, i + batchSize);
await Promise.all(batch.map(async (file) => {
const image = sharp(file);
await image.toFile(getOutputPath(file));
// Important: destroy sharp instance to free memory
image.destroy();
}));
// Force garbage collection hint (if available)
if (global.gc) global.gc();
console.log(`Processed ${Math.min(i + batchSize, files.length)}/${files.length}`);
}
}
Progress Reporting
const cliProgress = require('cli-progress');
async function processWithProgress(files) {
const bar = new cliProgress.SingleBar({
format: 'Progress |{bar}| {percentage}% | {value}/{total} | {file}',
barCompleteChar: '█',
barIncompleteChar: '░'
});
bar.start(files.length, 0, { file: '' });
for (let i = 0; i < files.length; i++) {
await processImage(files[i]);
bar.update(i + 1, { file: path.basename(files[i]) });
}
bar.stop();
}
Conclusion
Effective batch processing requires:
- Right tool for the job - Sharp/libvips for speed, ImageMagick for versatility
- Parallel processing - Use multiple cores/workers
- Memory management - Process in batches, destroy instances
- Error handling - Log failures, continue processing
- Progress tracking - Know where you are in large jobs
For enterprise-scale processing, consider cloud services like Sirv that handle optimization automatically and offer AI-powered features through Sirv AI Studio.