Skip to content
Vladimir Chavkov
Go back

Multi-Cloud Infrastructure with Terraform: AWS, Azure, and GCP

Edit page

Multi-Cloud Infrastructure with Terraform: AWS, Azure, and GCP

Organizations increasingly adopt multi-cloud strategies for redundancy, cost optimization, and avoiding vendor lock-in. Terraform’s provider-agnostic approach makes it ideal for managing infrastructure across AWS, Azure, and GCP from a single codebase. This guide covers patterns, challenges, and best practices for multi-cloud Terraform deployments.

Why Multi-Cloud?

Strategic Benefits

  1. Avoid Vendor Lock-In: Reduce dependency on single provider
  2. Geographic Coverage: Leverage best regions from each provider
  3. Cost Optimization: Use most cost-effective services per workload
  4. Disaster Recovery: Cross-cloud redundancy
  5. Best-of-Breed Services: Use specialized services from each cloud

Challenges

Multi-Cloud Terraform Patterns

1. Multiple Provider Configuration

providers.tf
terraform {
required_version = ">= 1.7.0"
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 5.0"
}
azurerm = {
source = "hashicorp/azurerm"
version = "~> 3.0"
}
google = {
source = "hashicorp/google"
version = "~> 5.0"
}
}
backend "s3" {
bucket = "terraform-state-multicloud"
key = "infrastructure/terraform.tfstate"
region = "us-east-1"
encrypt = true
dynamodb_table = "terraform-state-lock"
}
}
# AWS Provider
provider "aws" {
region = var.aws_region
default_tags {
tags = merge(
var.common_tags,
{
Cloud = "AWS"
ManagedBy = "Terraform"
}
)
}
}
# Azure Provider
provider "azurerm" {
features {
resource_group {
prevent_deletion_if_contains_resources = true
}
}
subscription_id = var.azure_subscription_id
}
# GCP Provider
provider "google" {
project = var.gcp_project_id
region = var.gcp_region
default_labels = merge(
var.common_tags,
{
cloud = "gcp"
managed_by = "terraform"
}
)
}

2. Cloud-Agnostic Module Structure

terraform/
├── modules/
│ ├── compute/
│ │ ├── aws/
│ │ │ ├── main.tf
│ │ │ ├── variables.tf
│ │ │ └── outputs.tf
│ │ ├── azure/
│ │ │ ├── main.tf
│ │ │ ├── variables.tf
│ │ │ └── outputs.tf
│ │ └── gcp/
│ │ ├── main.tf
│ │ ├── variables.tf
│ │ └── outputs.tf
│ ├── networking/
│ │ ├── aws/
│ │ ├── azure/
│ │ └── gcp/
│ └── storage/
│ ├── aws/
│ ├── azure/
│ └── gcp/
├── environments/
│ ├── production/
│ │ ├── aws.tf
│ │ ├── azure.tf
│ │ ├── gcp.tf
│ │ └── variables.tf
│ └── staging/
└── global/
└── dns/

3. Abstraction Layer Pattern

Create cloud-agnostic interfaces:

modules/compute/interface/variables.tf
variable "instance_name" {
type = string
description = "Name of the compute instance"
}
variable "instance_type" {
type = string
description = "Instance size (small, medium, large)"
}
variable "cloud_provider" {
type = string
description = "Cloud provider (aws, azure, gcp)"
validation {
condition = contains(["aws", "azure", "gcp"], var.cloud_provider)
error_message = "Provider must be aws, azure, or gcp."
}
}
# modules/compute/interface/main.tf
locals {
# Map generic sizes to provider-specific types
instance_types = {
aws = {
small = "t3.small"
medium = "t3.medium"
large = "t3.large"
}
azure = {
small = "Standard_B1s"
medium = "Standard_B2s"
large = "Standard_B4ms"
}
gcp = {
small = "e2-small"
medium = "e2-medium"
large = "e2-standard-2"
}
}
}
module "aws_instance" {
count = var.cloud_provider == "aws" ? 1 : 0
source = "../aws"
instance_name = var.instance_name
instance_type = local.instance_types.aws[var.instance_type]
# ... other AWS-specific parameters
}
module "azure_instance" {
count = var.cloud_provider == "azure" ? 1 : 0
source = "../azure"
instance_name = var.instance_name
vm_size = local.instance_types.azure[var.instance_type]
# ... other Azure-specific parameters
}
module "gcp_instance" {
count = var.cloud_provider == "gcp" ? 1 : 0
source = "../gcp"
instance_name = var.instance_name
machine_type = local.instance_types.gcp[var.instance_type]
# ... other GCP-specific parameters
}

Real-World Multi-Cloud Scenarios

Scenario 1: Multi-Cloud Web Application

# Deploy identical web app across clouds
module "webapp_aws" {
source = "./modules/webapp/aws"
app_name = "myapp"
environment = "production"
region = "us-east-1"
instance_count = 3
enable_cdn = true
enable_waf = true
backup_enabled = true
}
module "webapp_azure" {
source = "./modules/webapp/azure"
app_name = "myapp"
environment = "production"
region = "eastus"
instance_count = 3
enable_cdn = true
enable_waf = true
backup_enabled = true
}
# Global DNS failover
resource "aws_route53_record" "webapp_primary" {
zone_id = aws_route53_zone.main.zone_id
name = "app.company.com"
type = "A"
set_identifier = "aws-primary"
health_check_id = aws_route53_health_check.aws.id
failover_routing_policy {
type = "PRIMARY"
}
alias {
name = module.webapp_aws.load_balancer_dns
zone_id = module.webapp_aws.load_balancer_zone_id
evaluate_target_health = true
}
}
resource "aws_route53_record" "webapp_secondary" {
zone_id = aws_route53_zone.main.zone_id
name = "app.company.com"
type = "A"
set_identifier = "azure-secondary"
failover_routing_policy {
type = "SECONDARY"
}
alias {
name = module.webapp_azure.load_balancer_dns
zone_id = module.webapp_azure.load_balancer_zone_id
evaluate_target_health = true
}
}

Scenario 2: Data Processing Pipeline

# Use best services from each cloud
# AWS S3 for data lake (lowest cost)
resource "aws_s3_bucket" "data_lake" {
bucket = "company-data-lake"
}
# GCP BigQuery for analytics (best query performance)
resource "google_bigquery_dataset" "analytics" {
dataset_id = "analytics"
location = "US"
description = "Analytics dataset"
}
resource "google_bigquery_table" "events" {
dataset_id = google_bigquery_dataset.analytics.dataset_id
table_id = "events"
external_data_configuration {
source_uris = [
"gs://company-gcs-bridge/${aws_s3_bucket.data_lake.bucket}/*"
]
source_format = "PARQUET"
autodetect = true
}
}
# Azure Machine Learning for ML workloads
resource "azurerm_machine_learning_workspace" "ml" {
name = "company-ml-workspace"
location = azurerm_resource_group.ml.location
resource_group_name = azurerm_resource_group.ml.name
application_insights_id = azurerm_application_insights.ml.id
key_vault_id = azurerm_key_vault.ml.id
storage_account_id = azurerm_storage_account.ml.id
}

Scenario 3: Kubernetes Multi-Cloud

# Deploy Kubernetes across clouds
# AWS EKS
module "eks_cluster" {
source = "./modules/kubernetes/aws"
cluster_name = "production-eks"
cluster_version = "1.28"
region = "us-east-1"
node_groups = {
general = {
instance_types = ["t3.medium"]
min_size = 3
max_size = 10
desired_size = 5
}
}
}
# Azure AKS
module "aks_cluster" {
source = "./modules/kubernetes/azure"
cluster_name = "production-aks"
kubernetes_version = "1.28"
location = "eastus"
default_node_pool = {
name = "general"
vm_size = "Standard_D2s_v3"
node_count = 5
}
}
# GCP GKE
module "gke_cluster" {
source = "./modules/kubernetes/gcp"
cluster_name = "production-gke"
location = "us-central1"
kubernetes_version = "1.28"
node_pools = {
general = {
machine_type = "e2-medium"
min_count = 3
max_count = 10
}
}
}
# Multi-cluster service mesh with Istio
resource "helm_release" "istio_multicluster" {
for_each = {
eks = module.eks_cluster.cluster_endpoint
aks = module.aks_cluster.cluster_endpoint
gke = module.gke_cluster.cluster_endpoint
}
name = "istio-base"
repository = "https://istio-release.storage.googleapis.com/charts"
chart = "base"
namespace = "istio-system"
set {
name = "global.meshID"
value = "company-mesh"
}
set {
name = "global.multiCluster.clusterName"
value = each.key
}
}

Cross-Cloud Networking

1. VPN Connections

# AWS VPN Gateway
resource "aws_vpn_gateway" "main" {
vpc_id = aws_vpc.main.id
tags = {
Name = "aws-to-azure-vpn"
}
}
resource "aws_customer_gateway" "azure" {
bgp_asn = 65515
ip_address = azurerm_public_ip.vpn_gateway.ip_address
type = "ipsec.1"
tags = {
Name = "Azure VPN Gateway"
}
}
resource "aws_vpn_connection" "aws_to_azure" {
vpn_gateway_id = aws_vpn_gateway.main.id
customer_gateway_id = aws_customer_gateway.azure.id
type = "ipsec.1"
static_routes_only = false
}
# Azure VPN Gateway
resource "azurerm_virtual_network_gateway" "vpn" {
name = "azure-vpn-gateway"
location = azurerm_resource_group.network.location
resource_group_name = azurerm_resource_group.network.name
type = "Vpn"
vpn_type = "RouteBased"
sku = "VpnGw1"
ip_configuration {
name = "vnetGatewayConfig"
public_ip_address_id = azurerm_public_ip.vpn_gateway.id
private_ip_address_allocation = "Dynamic"
subnet_id = azurerm_subnet.gateway.id
}
}
resource "azurerm_local_network_gateway" "aws" {
name = "aws-local-gateway"
location = azurerm_resource_group.network.location
resource_group_name = azurerm_resource_group.network.name
gateway_address = aws_vpn_connection.aws_to_azure.tunnel1_address
address_space = [aws_vpc.main.cidr_block]
}
resource "azurerm_virtual_network_gateway_connection" "azure_to_aws" {
name = "azure-to-aws"
location = azurerm_resource_group.network.location
resource_group_name = azurerm_resource_group.network.name
type = "IPsec"
virtual_network_gateway_id = azurerm_virtual_network_gateway.vpn.id
local_network_gateway_id = azurerm_local_network_gateway.aws.id
shared_key = aws_vpn_connection.aws_to_azure.tunnel1_preshared_key
}

2. Cloud Interconnect

# Google Cloud Interconnect to AWS
resource "google_compute_interconnect_attachment" "aws" {
name = "aws-interconnect"
region = "us-central1"
type = "PARTNER"
edge_availability_domain = "AVAILABILITY_DOMAIN_1"
router = google_compute_router.interconnect.id
bandwidth = "BPS_1G"
}
# AWS Direct Connect
resource "aws_dx_connection" "gcp" {
name = "gcp-direct-connect"
bandwidth = "1Gbps"
location = "EqDC2" # Equinix DC2
tags = {
Purpose = "GCP Interconnect"
}
}

State Management for Multi-Cloud

1. Centralized State Storage

# Use single backend for all clouds
terraform {
backend "s3" {
bucket = "terraform-multicloud-state"
key = "global/terraform.tfstate"
region = "us-east-1"
encrypt = true
dynamodb_table = "terraform-state-lock"
}
}
# Alternative: Terraform Cloud
terraform {
cloud {
organization = "company-name"
workspaces {
name = "multicloud-production"
}
}
}

2. Workspace Strategy

Terminal window
# Separate workspaces per cloud/environment
terraform workspace new aws-production
terraform workspace new azure-production
terraform workspace new gcp-production
# Use workspace in configuration
locals {
workspace_config = {
aws-production = {
region = "us-east-1"
instance_count = 5
}
azure-production = {
region = "eastus"
instance_count = 3
}
}
config = local.workspace_config[terraform.workspace]
}

Cost Optimization

1. Cross-Cloud Cost Comparison Module

modules/cost-analyzer/main.tf
locals {
# Monthly cost estimates
aws_compute_cost = var.aws_instance_count * 30 * 24 * 0.0464 # t3.medium
azure_compute_cost = var.azure_vm_count * 30 * 24 * 0.0496 # Standard_B2s
gcp_compute_cost = var.gcp_instance_count * 30 * 24 * 0.0475 # e2-medium
total_monthly_cost = local.aws_compute_cost +
local.azure_compute_cost +
local.gcp_compute_cost
}
output "cost_breakdown" {
value = {
aws_monthly = local.aws_compute_cost
azure_monthly = local.azure_compute_cost
gcp_monthly = local.gcp_compute_cost
total_monthly = local.total_monthly_cost
}
}

2. Spot/Preemptible Instance Strategy

# Use spot instances across clouds for cost savings
module "batch_processing_aws" {
source = "./modules/batch/aws"
use_spot_instances = true
spot_max_price = "0.05" # 50% of on-demand
}
module "batch_processing_gcp" {
source = "./modules/batch/gcp"
use_preemptible_instances = true
# Preemptible is ~80% cheaper
}
module "batch_processing_azure" {
source = "./modules/batch/azure"
use_spot_instances = true
spot_max_price = -1 # Azure spot pricing
}

Security and Compliance

1. Unified Secret Management

# Store secrets in HashiCorp Vault (cloud-agnostic)
data "vault_generic_secret" "database" {
path = "secret/database/production"
}
# Use across all clouds
module "aws_database" {
source = "./modules/database/aws"
master_username = data.vault_generic_secret.database.data["username"]
master_password = data.vault_generic_secret.database.data["password"]
}
module "azure_database" {
source = "./modules/database/azure"
administrator_login = data.vault_generic_secret.database.data["username"]
administrator_password = data.vault_generic_secret.database.data["password"]
}

2. Consistent Security Policies

# Define security baseline
locals {
security_baseline = {
encryption_at_rest = true
encryption_in_transit = true
backup_enabled = true
multi_az = true
monitoring_enabled = true
allowed_ingress_cidrs = [
"10.0.0.0/8", # Internal networks
"172.16.0.0/12",
]
}
}
# Apply to all clouds
module "aws_security" {
source = "./modules/security/aws"
baseline = local.security_baseline
}
module "azure_security" {
source = "./modules/security/azure"
baseline = local.security_baseline
}
module "gcp_security" {
source = "./modules/security/gcp"
baseline = local.security_baseline
}

Monitoring and Observability

1. Unified Monitoring with Prometheus

# Deploy Prometheus to collect metrics from all clouds
resource "kubernetes_namespace" "monitoring" {
metadata {
name = "monitoring"
}
}
resource "helm_release" "prometheus" {
name = "prometheus"
repository = "https://prometheus-community.github.io/helm-charts"
chart = "kube-prometheus-stack"
namespace = kubernetes_namespace.monitoring.metadata[0].name
set {
name = "prometheus.prometheusSpec.externalLabels.cloud"
value = var.cloud_provider
}
set {
name = "prometheus.prometheusSpec.remoteWrite[0].url"
value = "https://prometheus.company.com/api/v1/write"
}
}

2. Centralized Logging

# Ship logs to centralized ELK stack
resource "helm_release" "filebeat" {
for_each = {
aws = module.eks_cluster.cluster_endpoint
azure = module.aks_cluster.cluster_endpoint
gcp = module.gke_cluster.cluster_endpoint
}
name = "filebeat"
repository = "https://helm.elastic.co"
chart = "filebeat"
namespace = "logging"
set {
name = "filebeatConfig.filebeat\\.yml"
value = templatefile("${path.module}/filebeat.yml", {
elasticsearch_host = "elasticsearch.company.com"
cloud_provider = each.key
})
}
}

Testing Multi-Cloud Infrastructure

1. Terratest Example

package test
import (
"testing"
"github.com/gruntwork-io/terratest/modules/terraform"
"github.com/stretchr/testify/assert"
)
func TestMultiCloudVPC(t *testing.T) {
t.Parallel()
// Test AWS VPC
awsOptions := &terraform.Options{
TerraformDir: "../environments/test/aws",
}
defer terraform.Destroy(t, awsOptions)
terraform.InitAndApply(t, awsOptions)
awsVpcId := terraform.Output(t, awsOptions, "vpc_id")
assert.NotEmpty(t, awsVpcId)
// Test Azure VNet
azureOptions := &terraform.Options{
TerraformDir: "../environments/test/azure",
}
defer terraform.Destroy(t, azureOptions)
terraform.InitAndApply(t, azureOptions)
azureVnetId := terraform.Output(t, azureOptions, "vnet_id")
assert.NotEmpty(t, azureVnetId)
// Test GCP VPC
gcpOptions := &terraform.Options{
TerraformDir: "../environments/test/gcp",
}
defer terraform.Destroy(t, gcpOptions)
terraform.InitAndApply(t, gcpOptions)
gcpVpcName := terraform.Output(t, gcpOptions, "vpc_name")
assert.NotEmpty(t, gcpVpcName)
}

Production Checklist

Conclusion

Multi-cloud Terraform requires careful planning and consistent patterns. Focus on creating cloud-agnostic abstractions while leveraging unique strengths of each provider. Start with a hybrid approach—use multiple clouds strategically rather than replicating everything everywhere.

Remember: multi-cloud adds complexity. Ensure the benefits (redundancy, cost optimization, specialized services) outweigh the operational overhead for your organization.


Need help with multi-cloud strategy? Our Terraform training covers advanced multi-cloud patterns, provider integration, and enterprise infrastructure management. Explore Terraform training or contact us for multi-cloud architecture consulting.


Edit page
Share this post on:

Previous Post
Service Mesh with Istio: Production Implementation and Best Practices
Next Post
Production Observability with Prometheus and Grafana: Complete Guide