Een compleet stappenplan voor het opzetten van Infrastructure as Code (Terraform) voor een Databricks workspace in Azure — van Service Principal tot Unity Catalog en CI/CD pipeline.
| Tool | Versie | Vereist? | Doel |
|---|---|---|---|
| Terraform | ≥ 1.6 | Vereist | IaC engine |
| Azure CLI | ≥ 2.55 | Vereist | Azure authenticatie & beheer |
| Git | ≥ 2.40 | Vereist | Versiebeheer |
| VS Code + Terraform ext. | Laatste | Optioneel | IDE met syntax highlighting |
| Databricks CLI | ≥ 0.200 | Optioneel | Handmatige checks en debugging |
# Controleer versies terraform --version # → Terraform v1.6+ az --version # → azure-cli 2.55+ git --version # → git version 2.40+ # Login bij Azure az login az account show # controleer juiste subscription az account set --subscription "<jouw-subscription-id>"
RG_NAME="rg-databricks-prod" LOCATION="westeurope" az group create \ --name $RG_NAME \ --location $LOCATION \ --tags Environment=Production Project=DataPlatform ManagedBy=Terraform
SP_NAME="sp-terraform-databricks" SUBSCRIPTION_ID=$(az account show --query id -o tsv) # Aanmaken met Contributor rechten op de resource group az ad sp create-for-rbac \ --name $SP_NAME \ --role Contributor \ --scopes /subscriptions/$SUBSCRIPTION_ID/resourceGroups/$RG_NAME \ --sdk-auth # Sla de output op — je hebt deze waarden nodig: # appId → ARM_CLIENT_ID # password → ARM_CLIENT_SECRET # tenant → ARM_TENANT_ID # subscriptionId → ARM_SUBSCRIPTION_ID
az keyvault create \ --name "kv-databricks-prod" \ --resource-group $RG_NAME \ --location $LOCATION \ --enable-rbac-authorization true # Sla SP secret op in Key Vault az keyvault secret set \ --vault-name "kv-databricks-prod" \ --name "terraform-sp-secret" \ --value "<sp-password>"
SA_NAME="stterraformstate001" # wereldwijd unieke naam az storage account create \ --name $SA_NAME \ --resource-group $RG_NAME \ --location $LOCATION \ --sku Standard_LRS \ --min-tls-version TLS1_2 az storage container create \ --name "tfstate" \ --account-name $SA_NAME
databricks-iac/ ├── main.tf # Hoofd resources ├── variables.tf # Input variabelen ├── outputs.tf # Output waarden ├── providers.tf # Provider configuratie ├── backend.tf # Remote state ├── terraform.tfvars # Waarden (niet committen!) ├── .gitignore └── modules/ ├── workspace/ # Databricks workspace module ├── unity-catalog/ # Unity Catalog module └── clusters/ # Cluster policies module
terraform { required_version = ">= 1.6" required_providers { azurerm = { source = "hashicorp/azurerm" version = "~> 3.90" } databricks = { source = "databricks/databricks" version = "~> 1.38" } } } provider "azurerm" { features {} # Authenticatie via env vars: # ARM_CLIENT_ID, ARM_CLIENT_SECRET, ARM_TENANT_ID, ARM_SUBSCRIPTION_ID } provider "databricks" { host = azurerm_databricks_workspace.main.workspace_url azure_workspace_resource_id = azurerm_databricks_workspace.main.id }
terraform { backend "azurerm" { resource_group_name = "rg-databricks-prod" storage_account_name = "stterraformstate001" container_name = "tfstate" key = "databricks.terraform.tfstate" } }
.terraform/
*.tfstate
*.tfstate.backup
*.tfvars # bevat secrets — nooit committen
.terraform.lock.hcl
variable "location" { default = "westeurope" } variable "resource_group" { default = "rg-databricks-prod" } variable "environment" { default = "prod" } variable "workspace_name" { default = "dbw-dataplatform-prod" } variable "sku" { default = "premium" } # premium vereist voor Unity Catalog
resource "azurerm_databricks_workspace" "main" { name = var.workspace_name resource_group_name = var.resource_group location = var.location sku = var.sku # "premium" voor Unity Catalog # Managed resource group voor Databricks-beheerde resources managed_resource_group_name = "rg-databricks-managed-prod" tags = { Environment = var.environment ManagedBy = "Terraform" Project = "DataPlatform" } } output "workspace_url" { value = azurerm_databricks_workspace.main.workspace_url } output "workspace_id" { value = azurerm_databricks_workspace.main.id }
# Exporteer SP credentials als env vars export ARM_CLIENT_ID="<appId>" export ARM_CLIENT_SECRET="<password>" export ARM_TENANT_ID="<tenant>" export ARM_SUBSCRIPTION_ID="<subscriptionId>" terraform init # providers downloaden + backend initialiseren terraform plan # bekijk wat er aangemaakt wordt terraform apply # uitvoeren (bevestig met 'yes')
resource "azurerm_storage_account" "unity" { name = "stunity001" resource_group_name = var.resource_group location = var.location account_tier = "Standard" account_replication_type = "LRS" is_hns_enabled = true # ADLS Gen2 vereist min_tls_version = "TLS1_2" } resource "azurerm_storage_container" "unity" { name = "unity-catalog" storage_account_name = azurerm_storage_account.unity.name container_access_type = "private" } resource "databricks_metastore" "main" { name = "metastore-prod" storage_root = "abfss://unity-catalog@${azurerm_storage_account.unity.name}.dfs.core.windows.net/" region = var.location force_destroy = false } resource "databricks_metastore_assignment" "main" { metastore_id = databricks_metastore.main.id workspace_id = azurerm_databricks_workspace.main.workspace_id }
resource "databricks_catalog" "bronze" { name = "bronze" comment = "Raw inkomende data — ongewijzigd" depends_on = [databricks_metastore_assignment.main] } resource "databricks_catalog" "silver" { name = "silver" comment = "Gecleande en gevalideerde data" } resource "databricks_catalog" "gold" { name = "gold" comment = "Business-ready data voor rapportages" } # Schema's per catalog resource "databricks_schema" "bronze_raw" { catalog_name = databricks_catalog.bronze.name name = "raw" } resource "databricks_schema" "silver_clean" { catalog_name = databricks_catalog.silver.name name = "clean" } resource "databricks_schema" "gold_reporting" { catalog_name = databricks_catalog.gold.name name = "reporting" }
resource "databricks_cluster_policy" "jobs" { name = "Jobs Cluster Policy" definition = jsonencode({ "spark_version" : { "type" : "allowlist", "values" : ["14.3.x-scala2.12", "15.4.x-scala2.12"], "defaultValue" : "15.4.x-scala2.12" }, "node_type_id" : { "type" : "allowlist", "values" : ["Standard_DS3_v2", "Standard_DS4_v2"] }, "autotermination_minutes" : { "type" : "fixed", "value" : 30, "hidden" : true }, "data_security_mode" : { "type" : "fixed", "value" : "SINGLE_USER" } }) }
resource "databricks_service_principal" "pipeline_sp" { application_id = "<aad-app-id>" display_name = "sp-databricks-pipelines" active = true } resource "databricks_group_member" "pipeline_sp_admin" { group_id = databricks_group.data_engineers.id member_id = databricks_service_principal.pipeline_sp.id }
Voeg deze secrets toe in GitHub → Settings → Secrets and variables → Actions:
| Secret naam | Waarde |
|---|---|
| ARM_CLIENT_ID | Service Principal appId |
| ARM_CLIENT_SECRET | Service Principal password |
| ARM_TENANT_ID | Azure tenant ID |
| ARM_SUBSCRIPTION_ID | Azure subscription ID |
name: Terraform Databricks IaC on: push: branches: [main] pull_request: branches: [main] env: ARM_CLIENT_ID: ${{ secrets.ARM_CLIENT_ID }} ARM_CLIENT_SECRET: ${{ secrets.ARM_CLIENT_SECRET }} ARM_TENANT_ID: ${{ secrets.ARM_TENANT_ID }} ARM_SUBSCRIPTION_ID: ${{ secrets.ARM_SUBSCRIPTION_ID }} jobs: terraform: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Setup Terraform uses: hashicorp/setup-terraform@v3 with: terraform_version: 1.6.6 - name: Terraform Init run: terraform init - name: Terraform Format Check run: terraform fmt -check - name: Terraform Validate run: terraform validate - name: Terraform Plan run: terraform plan -no-color if: github.event_name == 'pull_request' - name: Terraform Apply run: terraform apply -auto-approve if: github.ref == 'refs/heads/main' && github.event_name == 'push'