Building Scalable APIs
How we built a Google Sheets alternative using Conflict-free Replicated Data Types for seamless collaboration.
The Challenge
When multiple users edit the same spreadsheet simultaneously, conflicts are inevitable. Traditional approaches use operational transformation or locking mechanisms, but both have significant drawbacks.
We wanted real-time collaboration that:
- Works offline and syncs when reconnected
- Has no server as single source of truth
- Resolves conflicts automatically without user intervention
- Maintains consistency across all clients
Enter CRDTs (Conflict-free Replicated Data Types).
What Are CRDTs?
CRDTs are data structures designed for distributed systems where different replicas can be updated independently and concurrently, then merged automatically without conflicts.
Two Types of CRDTs
State-based (CvRDTs):
- Send entire state to other replicas
- Merge states using deterministic merge function
- Higher bandwidth but simpler logic
Operation-based (CmRDTs):
- Send operations (commands) to other replicas
- Apply operations in causal order
- Lower bandwidth but requires reliable delivery
We chose operation-based CRDTs for efficiency.
Modeling a Spreadsheet with CRDTs
Cell Values: Last-Write-Wins Register
For cell values, we use LWW-Register (Last-Write-Wins):
class LWWRegister {
constructor() {
this.value = null
this.timestamp = 0
this.replicaId = null
}
set(value, timestamp, replicaId) {
if (timestamp > this.timestamp ||
(timestamp === this.timestamp && replicaId > this.replicaId)) {
this.value = value
this.timestamp = timestamp
this.replicaId = replicaId
}
}
get() {
return this.value
}
}
Rows/Columns: RGA (Replicated Growable Array)
For maintaining row and column order, we use RGA:
class RGA {
constructor(replicaId) {
this.replicaId = replicaId
this.items = []
this.tombstones = new Set() // Track deletions
}
insert(afterId, value) {
const id = {
replicaId: this.replicaId,
clock: Date.now(),
value
}
if (!afterId) {
this.items.unshift(id)
} else {
const index = this.items.findIndex(item => item.id === afterId)
this.items.splice(index + 1, 0, id)
}
return id
}
delete(id) {
this.tombstones.add(JSON.stringify(id))
}
toArray() {
return this.items
.filter(item => !this.tombstones.has(JSON.stringify(item.id)))
.map(item => item.value)
}
}
Multi-User Selection: OR-Set
Track which cells each user has selected:
class ORSet {
constructor() {
this.adds = new Map() // element -> Set of unique tags
this.removes = new Set() // removed tags
}
add(element, tag) {
if (!this.adds.has(element)) {
this.adds.set(element, new Set())
}
this.adds.get(element).add(tag)
}
remove(element) {
const tags = this.adds.get(element)
if (tags) {
tags.forEach(tag => this.removes.add(tag))
}
}
has(element) {
const tags = this.adds.get(element)
if (!tags) return false
return Array.from(tags).some(tag => !this.removes.has(tag))
}
}
Implementing the Spreadsheet
Data Structure
class CollaborativeSpreadsheet {
constructor(replicaId) {
this.replicaId = replicaId
this.rows = new RGA(replicaId)
this.columns = new RGA(replicaId)
this.cells = new Map() // "row:col" -> LWWRegister
this.selections = new Map() // userId -> ORSet
}
setCellValue(row, col, value) {
const key = `${row}:${col}`
if (!this.cells.has(key)) {
this.cells.set(key, new LWWRegister())
}
const timestamp = Date.now()
this.cells.get(key).set(value, timestamp, this.replicaId)
// Broadcast operation to other replicas
this.broadcast({
type: 'SET_CELL',
row,
col,
value,
timestamp,
replicaId: this.replicaId
})
}
getCellValue(row, col) {
const key = `${row}:${col}`
const register = this.cells.get(key)
return register ? register.get() : null
}
insertRow(afterRow) {
const id = this.rows.insert(afterRow, { id: generateId() })
this.broadcast({
type: 'INSERT_ROW',
afterRow,
id
})
return id
}
deleteRow(rowId) {
this.rows.delete(rowId)
this.broadcast({
type: 'DELETE_ROW',
rowId
})
}
}
Handling Remote Operations
class CollaborativeSpreadsheet {
// ... previous code
applyRemoteOperation(op) {
switch (op.type) {
case 'SET_CELL':
const key = `${op.row}:${op.col}`
if (!this.cells.has(key)) {
this.cells.set(key, new LWWRegister())
}
this.cells.get(key).set(op.value, op.timestamp, op.replicaId)
this.render()
break
case 'INSERT_ROW':
this.rows.insert(op.afterRow, op.id)
this.render()
break
case 'DELETE_ROW':
this.rows.delete(op.rowId)
this.render()
break
}
}
}
Networking Layer
WebSocket for Real-Time Sync
class SyncEngine {
constructor(spreadsheet, url) {
this.spreadsheet = spreadsheet
this.ws = new WebSocket(url)
this.operationQueue = []
this.isOnline = false
this.ws.onopen = () => {
this.isOnline = true
this.flushQueue()
}
this.ws.onmessage = (event) => {
const op = JSON.parse(event.data)
this.spreadsheet.applyRemoteOperation(op)
}
this.ws.onclose = () => {
this.isOnline = false
}
// Listen for local operations
this.spreadsheet.on('operation', (op) => {
if (this.isOnline) {
this.ws.send(JSON.stringify(op))
} else {
this.operationQueue.push(op)
}
})
}
flushQueue() {
while (this.operationQueue.length > 0) {
const op = this.operationQueue.shift()
this.ws.send(JSON.stringify(op))
}
}
}
IndexedDB for Offline Storage
class PersistenceLayer {
constructor(dbName) {
this.dbName = dbName
this.db = null
}
async init() {
this.db = await openDB(this.dbName, 1, {
upgrade(db) {
db.createObjectStore('operations', { keyPath: 'id', autoIncrement: true })
db.createObjectStore('state')
}
})
}
async saveOperation(operation) {
await this.db.add('operations', {
...operation,
timestamp: Date.now()
})
}
async loadOperations() {
return await this.db.getAll('operations')
}
async saveState(state) {
await this.db.put('state', state, 'current')
}
async loadState() {
return await this.db.get('state', 'current')
}
}
Optimizations
1. Compression
Operations can be verbose. Use MessagePack or Protobuf:
import msgpack from 'msgpack-lite'
// Before sending
const compressed = msgpack.encode(operation)
ws.send(compressed)
// On receive
ws.onmessage = (event) => {
const operation = msgpack.decode(new Uint8Array(event.data))
spreadsheet.applyRemoteOperation(operation)
}
2. Batching
Don’t send every keystroke individually:
class OperationBatcher {
constructor(sendFn, interval = 100) {
this.sendFn = sendFn
this.interval = interval
this.batch = []
this.timer = null
}
add(operation) {
this.batch.push(operation)
if (!this.timer) {
this.timer = setTimeout(() => {
this.flush()
}, this.interval)
}
}
flush() {
if (this.batch.length > 0) {
this.sendFn(this.batch)
this.batch = []
}
this.timer = null
}
}
3. Garbage Collection
Delete old tombstones after all replicas have seen them:
class GarbageCollector {
constructor(rga, interval = 60000) {
this.rga = rga
this.seenBy = new Map() // tombstone -> Set of replicaIds
setInterval(() => {
this.collect()
}, interval)
}
markSeen(tombstone, replicaId) {
if (!this.seenBy.has(tombstone)) {
this.seenBy.set(tombstone, new Set())
}
this.seenBy.get(tombstone).add(replicaId)
}
collect() {
const allReplicas = getAllReplicaIds()
for (const [tombstone, replicas] of this.seenBy.entries()) {
if (allReplicas.every(id => replicas.has(id))) {
this.rga.tombstones.delete(tombstone)
this.seenBy.delete(tombstone)
}
}
}
}
Challenges We Faced
1. Formula Dependencies
Formulas like =SUM(A1:A10) create dependencies. When A1 changes, the formula must recalculate. We built a dependency graph:
class DependencyGraph {
constructor() {
this.dependencies = new Map() // cell -> Set of dependent cells
}
addDependency(cell, dependsOn) {
for (const dep of dependsOn) {
if (!this.dependencies.has(dep)) {
this.dependencies.set(dep, new Set())
}
this.dependencies.get(dep).add(cell)
}
}
getDependents(cell) {
return this.dependencies.get(cell) || new Set()
}
recalculate(cell, spreadsheet) {
const dependents = this.getDependents(cell)
for (const dependent of dependents) {
const value = evaluateFormula(dependent, spreadsheet)
spreadsheet.setCellValue(dependent.row, dependent.col, value)
this.recalculate(dependent, spreadsheet) // Recursive
}
}
}
2. Undo/Redo with CRDTs
Traditional undo doesn’t work well with CRDTs. We implemented command-based undo:
class UndoManager {
constructor(spreadsheet) {
this.spreadsheet = spreadsheet
this.undoStack = []
this.redoStack = []
}
execute(command) {
command.execute(this.spreadsheet)
this.undoStack.push(command)
this.redoStack = [] // Clear redo stack
}
undo() {
if (this.undoStack.length === 0) return
const command = this.undoStack.pop()
command.undo(this.spreadsheet)
this.redoStack.push(command)
}
redo() {
if (this.redoStack.length === 0) return
const command = this.redoStack.pop()
command.execute(this.spreadsheet)
this.undoStack.push(command)
}
}
class SetCellCommand {
constructor(row, col, newValue, oldValue) {
this.row = row
this.col = col
this.newValue = newValue
this.oldValue = oldValue
}
execute(spreadsheet) {
spreadsheet.setCellValue(this.row, this.col, this.newValue)
}
undo(spreadsheet) {
spreadsheet.setCellValue(this.row, this.col, this.oldValue)
}
}
3. Large Spreadsheets
Rendering 1 million cells is slow. We use virtual scrolling:
class VirtualGrid {
constructor(container, rowCount, colCount, rowHeight, colWidth) {
this.container = container
this.rowCount = rowCount
this.colCount = colCount
this.rowHeight = rowHeight
this.colWidth = colWidth
this.visibleRows = Math.ceil(container.clientHeight / rowHeight)
this.visibleCols = Math.ceil(container.clientWidth / colWidth)
this.scrollTop = 0
this.scrollLeft = 0
this.container.addEventListener('scroll', () => {
this.scrollTop = container.scrollTop
this.scrollLeft = container.scrollLeft
this.render()
})
}
render() {
const startRow = Math.floor(this.scrollTop / this.rowHeight)
const endRow = Math.min(startRow + this.visibleRows, this.rowCount)
const startCol = Math.floor(this.scrollLeft / this.colWidth)
const endCol = Math.min(startCol + this.visibleCols, this.colCount)
// Only render visible cells
for (let row = startRow; row < endRow; row++) {
for (let col = startCol; col < endCol; col++) {
renderCell(row, col)
}
}
}
}
Results
After 6 months of development:
- Latency: < 50ms for local operations
- Sync time: < 100ms for remote operations
- Offline support: Works perfectly offline, syncs when reconnected
- Conflict resolution: Zero manual conflict resolution required
- Scale: Tested with 100+ concurrent users
Lessons Learned
- CRDTs aren’t magic - They solve conflicts automatically but require careful design
- Offline-first is hard - But CRDTs make it much easier than alternatives
- Performance matters - Virtual rendering and batching are essential
- Test conflict scenarios - Network partitions, offline editing, concurrent edits
- Libraries help - Use Yjs or Automerge instead of rolling your own
Resources
- CRDT Papers
- Yjs - CRDT library we ended up using
- Automerge - Another excellent CRDT library
- Conflict-Free Replicated Data Types (paper)