Pyrvex — Backend & Infrastructure Engineering

The Problem with Naive Postgres for IoT

A single industrial plant can generate 50,000+ metric inserts per minute from PLCs, sensors, and SCADA systems. Standard Postgres with a naive metrics table will:

Bloat indexes to hundreds of GBs within weeks
Slow range queries to seconds
Make retention policies a manual nightmare

TimescaleDB solves all three — without leaving the Postgres ecosystem.

Schema Design

sql
CREATE TABLE metrics (
    time        TIMESTAMPTZ       NOT NULL,
    device_id   TEXT              NOT NULL,
    tag         TEXT              NOT NULL,
    value       DOUBLE PRECISION  NOT NULL,
    quality     SMALLINT          DEFAULT 192
);

SELECT create_hypertable('metrics', 'time', chunk_time_interval => INTERVAL '1 hour');

-- Partition also by device for large fleets
SELECT add_dimension('metrics', 'device_id', number_partitions => 8);

-- Compression (critical for storage efficiency)
ALTER TABLE metrics SET (
    timescaledb.compress,
    timescaledb.compress_segmentby = 'device_id, tag',
    timescaledb.compress_orderby = 'time DESC'
);

SELECT add_compression_policy('metrics', INTERVAL '7 days');
SELECT add_retention_policy('metrics', INTERVAL '90 days');

Go Ingestion Pipeline

Batching is the single most important optimization. Never insert row by row.

go
type MetricBatch struct {
    mu      sync.Mutex
    items   []Metric
    maxSize int
    flush   func([]Metric) error
    ticker  *time.Ticker
}

func NewMetricBatch(maxSize int, interval time.Duration, flush func([]Metric) error) *MetricBatch {
    b := &MetricBatch{
        maxSize: maxSize,
        flush:   flush,
        ticker:  time.NewTicker(interval),
    }
    go b.run()
    return b
}

func (b *MetricBatch) Add(m Metric) {
    b.mu.Lock()
    b.items = append(b.items, m)
    shouldFlush := len(b.items) >= b.maxSize
    b.mu.Unlock()
    if shouldFlush {
        b.Flush()
    }
}

func (b *MetricBatch) run() {
    for range b.ticker.C {
        b.Flush()
    }
}

func (b *MetricBatch) Flush() {
    b.mu.Lock()
    if len(b.items) == 0 {
        b.mu.Unlock()
        return
    }
    batch := b.items
    b.items = nil
    b.mu.Unlock()

    if err := b.flush(batch); err != nil {
        log.Printf("flush error: %v", err)
    }
}

go
func insertBatch(ctx context.Context, db *pgxpool.Pool, batch []Metric) error {
    rows := make([][]interface{}, len(batch))
    for i, m := range batch {
        rows[i] = []interface{}{m.Time, m.DeviceID, m.Tag, m.Value, m.Quality}
    }
    _, err := db.CopyFrom(ctx,
        pgx.Identifier{"metrics"},
        []string{"time", "device_id", "tag", "value", "quality"},
        pgx.CopyFromRows(rows),
    )
    return err
}

pgx.CopyFrom uses the Postgres COPY protocol — it's 5-10x faster than bulk INSERT for large batches.

Continuous Aggregates for Dashboards

Don't query raw data for dashboards. Use continuous aggregates:

sql
CREATE MATERIALIZED VIEW metrics_1min
WITH (timescaledb.continuous) AS
SELECT
    time_bucket('1 minute', time) AS bucket,
    device_id,
    tag,
    AVG(value)  AS avg_value,
    MIN(value)  AS min_value,
    MAX(value)  AS max_value,
    COUNT(*)    AS sample_count
FROM metrics
GROUP BY bucket, device_id, tag
WITH NO DATA;

SELECT add_continuous_aggregate_policy('metrics_1min',
    start_offset => INTERVAL '1 hour',
    end_offset   => INTERVAL '1 minute',
    schedule_interval => INTERVAL '1 minute'
);

Dashboard queries now hit the materialized view instead of billions of raw rows.

Key Takeaways

Hypertables + compression cut storage by 90%+ vs raw Postgres
COPY protocol via pgx for ingestion, not INSERT
Batch size 1000-5000 rows at 1-5 second intervals is the sweet spot
Continuous aggregates are non-negotiable for any dashboard over raw IoT data