-- UPDATEUPDATE sales_delta
SET price = price * 1.1
WHERE category = 'Electronics';
-- MERGE (UPSERT)MERGE INTO customers_target t
USING customers_source s
ON t.customer_id = s.customer_id
WHEN MATCHED THENUPDATE SET t.email = s.email
WHEN NOT MATCHED THENINSERT (customer_id, email) VALUES (s.customer_id, s.email);
Time Travel
Data historie bekijken:
-- Toon table historyDESCRIBE HISTORY sales_delta;
-- Data van specifieke versieSELECT * FROM sales_delta
VERSION AS OF 5;
-- Data van specifieke timestampSELECT * FROM sales_delta
TIMESTAMP AS OF'2024-01-15 10:30:00';
Optimization & Vacuum
Performance optimalisatie:
-- Z-Ordering voor query performanceOPTIMIZE sales_delta
ZORDER BY customer_id, transaction_date;
-- Complete Unity Catalog setup-- 1. Catalog aanmakenCREATE CATALOG IF NOT EXISTS enterprise_data
COMMENT'Enterprise data catalog voor alle business units';
-- 2. Schemas voor verschillende zonesCREATE SCHEMA enterprise_data.raw
COMMENT'Raw landing zone voor bron systemen';
CREATE SCHEMA enterprise_data.staging
COMMENT'Staging zone voor data transformatie';
CREATE SCHEMA enterprise_data.analytics
COMMENT'Analytics zone voor business reports';
-- 3. Table met managed storageCREATE TABLE enterprise_data.raw.customers (
customer_id BIGINT,
first_name STRING,
last_name STRING,
email STRING,
created_at TIMESTAMP
)
USING delta
COMMENT'Customer master data'TBLPROPERTIES (
'delta.autoOptimize.optimizeWrite' = 'true',
'delta.autoOptimize.autoCompact' = 'true'
);
๐ Spark SQL - Databricks Notebooks
Notebook Basics
Notebook cell commands:
-- Magic commands%sql# SQL cell%python# Python cell%scala# Scala cell%r# R cell%sh# Shell commands%fs# File system commands%md# Markdown documentation
-- Query warehouseSELECT *
FROM system.compute.warehouses
WHERE state = 'RUNNING';
Query History
Monitor query performance:
-- Recent queriesSELECT
query_text,
execution_time,
rows_produced,
user_name,
warehouse_id
FROM system.query.history
WHERE start_time > CURRENT_TIMESTAMP - INTERVAL 1 DAY
ORDER BY execution_time DESCLIMIT 100;
BI Integratie
Connect Power BI/Tableau:
# Connection string voor Power BI
Server: https://<workspace>.cloud.databricks.com
Port: 443
HTTP Path: /sql/1.0/warehouses/<warehouse-id>
Authentication: Personal Access Token
๐ก Performance Tips:
1. Gebruik Photon engine voor snellere queries
2. Configureer auto-scaling voor variabele workloads
3. Gebruik Delta Cache voor herhaalde queries
4. Optimaliseer regulier met OPTIMIZE en ZORDER
5. Monitor performance met Query History
๐ Security & Governance
Row & Column Security
Fine-grained access control:
-- Row filter (dynamic)CREATE ROW FILTER sales_region_filter
ON sales
AS (region) WHERE region IN (
SELECT region FROM user_regions
WHERE user = CURRENT_USER()
);
-- Column mask (PII)CREATE MASK email_mask AS (
CASEWHEN CURRENT_USER() = 'admin@company.com'THEN email
ELSE REGEXP_REPLACE(email, '(.)(.*)@', '***@')
END
);
-- Clone voor backupCREATE TABLE sales_backup
DEEP CLONE sales_production;
-- Restore from versionRESTORE TABLE sales
TO VERSION AS OF 10;
-- Export voor on-prem backup# Delta Lake export
df.write.format("delta")\
.save("abfss://container@storage.dfs.core.windows.net/backup/sales")
๐ Snelle Referentie - Dagelijks Gebruik
-- Meest gebruikte Databricks commands-- 1. Data lezen van Delta tableSELECT * FROM delta.`/mnt/datalake/sales`;
-- 2. Nieuwe Delta table aanmakenCREATE TABLE sales_delta
USING delta
LOCATION'/mnt/datalake/sales'AS SELECT * FROM sales_csv;
-- 3. Time travel querySELECT * FROM sales_delta
TIMESTAMP AS OF'2024-01-15';
-- 4. MERGE (UPSERT) operationMERGE INTO target_table t
USING source_table s ON t.id = s.id
WHEN MATCHED THEN UPDATE SET *
WHEN NOT MATCHED THEN INSERT *;
-- 5. Performance optimalisatieOPTIMIZE sales_delta
ZORDER BY (customer_id, date);
-- 6. Unity Catalog tableCREATE TABLE catalog.schema.table_name
AS SELECT * FROM source_table;
-- 7. MLflow experiment querySELECT * FROM mlflow_artifact
WHERE experiment_id = '123';
Hoe te gebruiken:
1. Print deze pagina als PDF (Ctrl+P โ Save as PDF)
2. Bewaar als referentie naast je Databricks workspace