TerraformPilot

Terraform

AWS Kinesis Data Streams with Terraform

Deploy AWS Kinesis Data Streams with Terraform. Stream configuration, shard management, Lambda consumers, Firehose delivery, and encryption settings.

LLuca Berton1 min read

Quick Answer

#
resource "aws_kinesis_stream" "events" {
  name             = "events-stream"
  shard_count      = 1
  retention_period = 24
 
  stream_mode_details {
    stream_mode = "PROVISIONED"
  }
}
#
resource "aws_kinesis_stream" "events" {
  name             = "${var.project}-events"
  retention_period = 72  # Hours (24-8760)
 
  stream_mode_details {
    stream_mode = "ON_DEMAND"  # Auto-scales, no shard management
  }
 
  encryption_type = "KMS"
  kms_key_id      = "alias/aws/kinesis"
 
  tags = { Environment = var.environment }
}

Provisioned Stream with Scaling

#
resource "aws_kinesis_stream" "high_throughput" {
  name             = "${var.project}-high-throughput"
  shard_count      = 4  # 4 MB/s write, 8 MB/s read
  retention_period = 168  # 7 days
 
  stream_mode_details {
    stream_mode = "PROVISIONED"
  }
 
  encryption_type = "KMS"
  kms_key_id      = aws_kms_key.kinesis.arn
 
  shard_level_metrics = [
    "IncomingBytes",
    "OutgoingBytes",
    "IncomingRecords",
    "OutgoingRecords",
    "WriteProvisionedThroughputExceeded",
    "ReadProvisionedThroughputExceeded",
    "IteratorAgeMilliseconds",
  ]
 
  tags = { Environment = var.environment }
}

Lambda Consumer

#
resource "aws_lambda_event_source_mapping" "kinesis" {
  event_source_arn  = aws_kinesis_stream.events.arn
  function_name     = aws_lambda_function.processor.arn
  starting_position = "LATEST"
  batch_size        = 100
 
  maximum_batching_window_in_seconds = 5
  maximum_retry_attempts             = 3
  bisect_batch_on_function_error     = true
  parallelization_factor             = 2
 
  destination_config {
    on_failure {
      destination_arn = aws_sqs_queue.dlq.arn
    }
  }
 
  function_response_types = ["ReportBatchItemFailures"]
}
 
resource "aws_iam_role_policy" "lambda_kinesis" {
  name = "kinesis-access"
  role = aws_iam_role.lambda.id
 
  policy = jsonencode({
    Version = "2012-10-17"
    Statement = [{
      Effect = "Allow"
      Action = [
        "kinesis:GetRecords",
        "kinesis:GetShardIterator",
        "kinesis:DescribeStream",
        "kinesis:ListShards",
        "kinesis:SubscribeToShard",
      ]
      Resource = aws_kinesis_stream.events.arn
    }]
  })
}

Kinesis Data Firehose (S3 Delivery)

#
resource "aws_kinesis_firehose_delivery_stream" "s3" {
  name        = "${var.project}-to-s3"
  destination = "extended_s3"
 
  kinesis_source_configuration {
    kinesis_stream_arn = aws_kinesis_stream.events.arn
    role_arn           = aws_iam_role.firehose.arn
  }
 
  extended_s3_configuration {
    role_arn   = aws_iam_role.firehose.arn
    bucket_arn = aws_s3_bucket.data_lake.arn
    prefix     = "events/year=!{timestamp:yyyy}/month=!{timestamp:MM}/day=!{timestamp:dd}/"
 
    buffering_size     = 64   # MB
    buffering_interval = 300  # Seconds
 
    compression_format = "GZIP"
 
    cloudwatch_logging_options {
      enabled         = true
      log_group_name  = aws_cloudwatch_log_group.firehose.name
      log_stream_name = "S3Delivery"
    }
  }
}

Producer IAM Policy

#
resource "aws_iam_policy" "kinesis_producer" {
  name = "${var.project}-kinesis-producer"
 
  policy = jsonencode({
    Version = "2012-10-17"
    Statement = [{
      Effect = "Allow"
      Action = [
        "kinesis:PutRecord",
        "kinesis:PutRecords",
      ]
      Resource = aws_kinesis_stream.events.arn
    }]
  })
}

Provisioned vs On-Demand

#
FeatureProvisionedOn-Demand
CapacityFixed shard countAuto-scales
Write1 MB/s per shardUp to 200 MB/s
Read2 MB/s per shardUp to 400 MB/s
CostPer shard-hourPer GB + per million records
Best forPredictable trafficVariable/unpredictable
#

Conclusion

#

Use on-demand mode for variable workloads (no shard management). Use provisioned mode with shard-level metrics when you need cost predictability. Always enable encryption, configure Lambda consumers with bisect_batch_on_function_error and dead-letter queues, and use Firehose for S3/data lake delivery.

#Terraform#AWS#Kinesis#Streaming#Infrastructure as Code

Share this article