TerraformPilot

Terraform

AWS MSK (Managed Kafka) with Terraform

Deploy AWS MSK (Managed Streaming for Kafka) with Terraform. Cluster configuration, MSK Serverless, encryption, monitoring, and topic management.

LLuca Berton1 min read

Quick Answer

#
resource "aws_msk_cluster" "main" {
  cluster_name           = "my-kafka"
  kafka_version          = "3.6.0"
  number_of_broker_nodes = 3
 
  broker_node_group_info {
    instance_type  = "kafka.t3.small"
    client_subnets = aws_subnet.private[*].id
    storage_info {
      ebs_storage_info {
        volume_size = 100
      }
    }
    security_groups = [aws_security_group.msk.id]
  }
}

Production Cluster

#
resource "aws_msk_cluster" "main" {
  cluster_name           = "${var.project}-kafka"
  kafka_version          = "3.6.0"
  number_of_broker_nodes = 6  # Must be multiple of AZ count
 
  broker_node_group_info {
    instance_type  = "kafka.m5.large"
    client_subnets = aws_subnet.private[*].id
    security_groups = [aws_security_group.msk.id]
 
    storage_info {
      ebs_storage_info {
        volume_size = 500
        provisioned_throughput {
          enabled           = true
          volume_throughput  = 250  # MiB/s
        }
      }
    }
 
    connectivity_info {
      public_access { type = "DISABLED" }
    }
  }
 
  encryption_info {
    encryption_in_transit {
      client_broker = "TLS"
      in_cluster    = true
    }
    encryption_at_rest_kms_key_arn = aws_kms_key.msk.arn
  }
 
  client_authentication {
    sasl {
      iam   = true
      scram = true
    }
    tls {
      certificate_authority_arns = [aws_acmpca_certificate_authority.msk.arn]
    }
  }
 
  configuration_info {
    arn      = aws_msk_configuration.main.arn
    revision = aws_msk_configuration.main.latest_revision
  }
 
  open_monitoring {
    prometheus {
      jmx_exporter { enabled_in_broker = true }
      node_exporter { enabled_in_broker = true }
    }
  }
 
  logging_info {
    broker_logs {
      cloudwatch_logs {
        enabled   = true
        log_group = aws_cloudwatch_log_group.msk.name
      }
      s3_logs {
        enabled = true
        bucket  = aws_s3_bucket.msk_logs.id
        prefix  = "kafka-logs"
      }
    }
  }
 
  tags = { Environment = var.environment }
 
  timeouts {
    create = "60m"
    update = "120m"
    delete = "60m"
  }
}

Cluster Configuration

#
resource "aws_msk_configuration" "main" {
  name              = "${var.project}-kafka-config"
  kafka_versions    = ["3.6.0"]
 
  server_properties = <<-PROPERTIES
    auto.create.topics.enable=false
    default.replication.factor=3
    min.insync.replicas=2
    num.partitions=6
    num.io.threads=8
    num.network.threads=5
    log.retention.hours=168
    log.segment.bytes=1073741824
    message.max.bytes=10485760
    compression.type=lz4
  PROPERTIES
}

MSK Serverless

#
resource "aws_msk_serverless_cluster" "main" {
  cluster_name = "${var.project}-kafka-serverless"
 
  vpc_config {
    subnet_ids         = aws_subnet.private[*].id
    security_group_ids = [aws_security_group.msk.id]
  }
 
  client_authentication {
    sasl {
      iam { enabled = true }
    }
  }
}

Security Group

#
resource "aws_security_group" "msk" {
  name   = "${var.project}-msk"
  vpc_id = aws_vpc.main.id
 
  ingress {
    description     = "Kafka TLS"
    from_port       = 9094
    to_port         = 9094
    protocol        = "tcp"
    security_groups = [aws_security_group.app.id]
  }
 
  ingress {
    description     = "Kafka IAM"
    from_port       = 9098
    to_port         = 9098
    protocol        = "tcp"
    security_groups = [aws_security_group.app.id]
  }
 
  ingress {
    description = "ZooKeeper"
    from_port   = 2181
    to_port     = 2181
    protocol    = "tcp"
    self        = true
  }
 
  egress {
    from_port   = 0
    to_port     = 0
    protocol    = "-1"
    cidr_blocks = ["0.0.0.0/0"]
  }
}

SCRAM Authentication

#
resource "aws_secretsmanager_secret" "msk_user" {
  name       = "AmazonMSK_${var.project}_user"
  kms_key_id = aws_kms_key.msk.arn
}
 
resource "aws_secretsmanager_secret_version" "msk_user" {
  secret_id = aws_secretsmanager_secret.msk_user.id
  secret_string = jsonencode({
    username = "app-user"
    password = var.msk_password
  })
}
 
resource "aws_msk_scram_secret_association" "main" {
  cluster_arn     = aws_msk_cluster.main.arn
  secret_arn_list = [aws_secretsmanager_secret.msk_user.arn]
}

Outputs

#
output "bootstrap_brokers_tls" {
  value = aws_msk_cluster.main.bootstrap_brokers_tls
}
 
output "bootstrap_brokers_iam" {
  value = aws_msk_cluster.main.bootstrap_brokers_sasl_iam
}
 
output "zookeeper_connect" {
  value = aws_msk_cluster.main.zookeeper_connect_string
}

Instance Types

#
TypeUse CasePartitions/Broker
kafka.t3.smallDev/testUp to 300
kafka.m5.largeProductionUp to 1,000
kafka.m5.2xlargeHigh throughputUp to 2,000
#

Conclusion

#

Use MSK Serverless for variable workloads, provisioned clusters for predictable traffic. Set min.insync.replicas=2 with replication.factor=3 for durability. Use IAM authentication over SCRAM when possible, enable Prometheus monitoring, and ship broker logs to CloudWatch and S3. MSK clusters take 15-30 minutes to create.

#Terraform#AWS#MSK#Kafka#Infrastructure as Code

Share this article