From 2c18d7e0875b3d171e3e21476d82618777cdc310 Mon Sep 17 00:00:00 2001 From: Tommy Dang Date: Tue, 14 May 2024 01:28:13 -0700 Subject: [PATCH] 3 --- .env.dev | 4 +- .gitignore | 4 + Dockerfile | 2 +- mlops/metadata.yaml | 12 ++ mlops/unit_5_deploying/aws/alb.tf | 82 ++++++++ mlops/unit_5_deploying/aws/db.tf | 83 ++++++++ mlops/unit_5_deploying/aws/efs.tf | 36 ++++ mlops/unit_5_deploying/aws/env_vars.json | 34 ++++ mlops/unit_5_deploying/aws/iam.tf | 73 +++++++ mlops/unit_5_deploying/aws/lambda.tf | 23 +++ mlops/unit_5_deploying/aws/main.tf | 190 ++++++++++++++++++ mlops/unit_5_deploying/aws/networking.tf | 56 ++++++ .../aws/python/event_handler.py | 26 +++ .../aws/python/event_handler.zip | Bin 0 -> 637 bytes mlops/unit_5_deploying/aws/variables.tf | 110 ++++++++++ mlops/unit_5_deploying/aws/vpc.tf | 11 + scripts/start.sh | 6 +- 17 files changed, 748 insertions(+), 4 deletions(-) create mode 100644 mlops/unit_5_deploying/aws/alb.tf create mode 100644 mlops/unit_5_deploying/aws/db.tf create mode 100644 mlops/unit_5_deploying/aws/efs.tf create mode 100644 mlops/unit_5_deploying/aws/env_vars.json create mode 100644 mlops/unit_5_deploying/aws/iam.tf create mode 100644 mlops/unit_5_deploying/aws/lambda.tf create mode 100644 mlops/unit_5_deploying/aws/main.tf create mode 100644 mlops/unit_5_deploying/aws/networking.tf create mode 100644 mlops/unit_5_deploying/aws/python/event_handler.py create mode 100644 mlops/unit_5_deploying/aws/python/event_handler.zip create mode 100644 mlops/unit_5_deploying/aws/variables.tf create mode 100644 mlops/unit_5_deploying/aws/vpc.tf diff --git a/.env.dev b/.env.dev index bfe1a6daf..88471e5ed 100644 --- a/.env.dev +++ b/.env.dev @@ -23,5 +23,5 @@ EXPERIMENTS_DB=experiments EXPERIMENTS_TRACKING_URI="postgresql+psycopg2://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:5432/${EXPERIMENTS_DB}" # Alerts -SMTP_EMAIL= -SMTP_PASSWORD= +SMTP_EMAIL=$SMTP_EMAIL +SMTP_PASSWORD=$SMTP_PASSWORD diff --git a/.gitignore b/.gitignore index 63f3eeffb..1a9eff0a2 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,8 @@ .mage_temp_profiles .preferences.yaml .ssh_tunnel +.terraform +.terraform.* .variables/ __pycache__/ docker-compose.override.yml @@ -15,4 +17,6 @@ mage-ai.db mage_data/ mlruns secrets/ +terraform.tfstate +terraform.tfstate.backup titanic_clean.csv diff --git a/Dockerfile b/Dockerfile index 1b5951949..cfcf29197 100644 --- a/Dockerfile +++ b/Dockerfile @@ -19,6 +19,6 @@ ENV USER_CODE_PATH=${USER_CODE_PATH} # Install custom Python libraries RUN pip3 install -r ${USER_CODE_PATH}/requirements.txt -ENV PYTHONPATH="${PYTHONPATH}:/home/mage_code" +ENV PYTHONPATH="${PYTHONPATH}:${MAGE_CODE_PATH}/${PROJECT_NAME}" CMD ["/bin/sh", "-c", "/app/run_app.sh"] diff --git a/mlops/metadata.yaml b/mlops/metadata.yaml index 6eadc9f70..d674645b2 100644 --- a/mlops/metadata.yaml +++ b/mlops/metadata.yaml @@ -16,3 +16,15 @@ features: polars: true project_uuid: 36404d0ffc214b8a89f598f3522c1a20 help_improve_mage: true +notification_config: + alert_on: + - trigger_failure + - trigger_passed_sla + - trigger_success + email_config: + smtp_host: smtp.gmail.com + smtp_user: "{{ env_var('SMTP_EMAIL') }}" + smtp_password: "{{ env_var('SMTP_PASSWORD') }}" + smtp_mail_from: "{{ env_var('SMTP_EMAIL') }}" + to_emails: + - "{{ env_var('SMTP_EMAIL') }}" diff --git a/mlops/unit_5_deploying/aws/alb.tf b/mlops/unit_5_deploying/aws/alb.tf new file mode 100644 index 000000000..1268c2513 --- /dev/null +++ b/mlops/unit_5_deploying/aws/alb.tf @@ -0,0 +1,82 @@ +# alb.tf | Load Balancer Configuration + +resource "aws_alb" "application_load_balancer" { + name = "${var.app_name}-${var.app_environment}-alb" + internal = false + load_balancer_type = "application" + subnets = aws_subnet.public.*.id + security_groups = [aws_security_group.load_balancer_security_group.id] + + tags = { + Name = "${var.app_name}-alb" + Environment = var.app_environment + } +} + +data "http" "myip" { + url = "http://ipv4.icanhazip.com" +} + +resource "aws_security_group" "load_balancer_security_group" { + vpc_id = aws_vpc.aws-vpc.id + + ingress { + from_port = 443 + to_port = 443 + protocol = "tcp" + cidr_blocks = ["${chomp(data.http.myip.response_body)}/32"] + } + + ingress { + from_port = 80 + to_port = 80 + protocol = "tcp" + cidr_blocks = ["${chomp(data.http.myip.response_body)}/32"] + } + + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + ipv6_cidr_blocks = ["::/0"] + } + tags = { + Name = "${var.app_name}-sg" + Environment = var.app_environment + } +} + +resource "aws_lb_target_group" "target_group" { + name = "${var.app_name}-${var.app_environment}-tg" + port = 6789 + protocol = "HTTP" + target_type = "ip" + vpc_id = aws_vpc.aws-vpc.id + + health_check { + healthy_threshold = "3" + interval = "30" + protocol = "HTTP" + matcher = "200" + timeout = "5" + path = "/api/status" + unhealthy_threshold = "2" + } + + tags = { + Name = "${var.app_name}-lb-tg" + Environment = var.app_environment + } +} + +resource "aws_lb_listener" "listener" { + load_balancer_arn = aws_alb.application_load_balancer.id + port = "80" + protocol = "HTTP" + + default_action { + type = "forward" + target_group_arn = aws_lb_target_group.target_group.id + } +} diff --git a/mlops/unit_5_deploying/aws/db.tf b/mlops/unit_5_deploying/aws/db.tf new file mode 100644 index 000000000..f99f927db --- /dev/null +++ b/mlops/unit_5_deploying/aws/db.tf @@ -0,0 +1,83 @@ +# db.tf | Database Configuration + +resource "aws_db_subnet_group" "rds_subnet_group" { + name = "${var.app_name}-${var.app_environment}-rds-subnet-group" + description = "${var.app_name} RDS subnet group" + subnet_ids = aws_subnet.public.*.id + tags = { + Environment = var.app_environment + } +} + + +resource "aws_security_group" "rds_sg" { + name = "${var.app_name}-${var.app_environment}-rds-sg" + description = "${var.app_name} RDS Security Group" + vpc_id = aws_vpc.aws-vpc.id + + tags = { + Name = "${var.app_name}-${var.app_environment}-rds-sg" + Environment = var.app_environment + } + + // allows traffic from the SG itself + ingress { + from_port = 0 + to_port = 0 + protocol = "-1" + self = true + } + + //allow traffic for TCP 5432 + ingress { + from_port = 5432 + to_port = 5432 + protocol = "tcp" + security_groups = ["${aws_security_group.service_security_group.id}"] + } + + // outbound internet access + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + } +} + +resource "aws_db_instance" "rds" { + identifier = "${var.app_name}-${var.app_environment}-db" + allocated_storage = 20 + engine = "postgres" + engine_version = "16.3" + instance_class = "db.t3.micro" + multi_az = false + db_name = "mage" + username = var.database_user // export TF_VAR_database_username="..." + password = var.database_password // export TF_VAR_database_password="..." + db_subnet_group_name = aws_db_subnet_group.rds_subnet_group.id + vpc_security_group_ids = ["${aws_security_group.rds_sg.id}"] + skip_final_snapshot = true + publicly_accessible = true + + tags = { + Environment = var.app_environment + } +} + +# Extra resources specific to this project. + +resource "null_resource" "db_setup" { + depends_on = [aws_db_instance.rds] + + provisioner "local-exec" { + command = <t#M38FjQ!(&Z|k>KeF^bZ)kW_fzV4hIbcW}?7tdL% z;J-&O}{O{f8-~aCRkC$(WdShB`P#~aOyzA|*`Sol6zcSdF z=c)flX1`L?&ZKMc=kg;rhfnwGz4lFc;ZmKWe|hA8JbO6BV~c`;>U*{nac6V$&r)rI z#foNA`OVjd9s7AiNqI)%PS`Py6lU#Rf&X-!D$8+;@9!>D51c3=IGO z2Y9n{$flp#l)=cru%3y5ApmDA31Y-jRb~ND7sL!uWU_(?APDegWD;RU#5J<{pty#C vC5<2!dd#Ei2C0FW3~~buENNT@v>jD9&_w~>tZX19Oh8x*q+bCQF)#oC3)$kO literal 0 HcmV?d00001 diff --git a/mlops/unit_5_deploying/aws/variables.tf b/mlops/unit_5_deploying/aws/variables.tf new file mode 100644 index 000000000..2b9c6a2f5 --- /dev/null +++ b/mlops/unit_5_deploying/aws/variables.tf @@ -0,0 +1,110 @@ +variable "AWS_ACCESS_KEY_ID" { + type = string + default = "AWS_ACCESS_KEY_ID" +} + +variable "AWS_SECRET_ACCESS_KEY" { + type = string + default = "AWS_SECRET_ACCESS_KEY" +} + +variable "DATABASE_CONNECTION_URL" { + type = string + default = "" +} + +variable "app_count" { + type = number + default = 1 +} + +variable "aws_region" { + type = string + description = "AWS Region" + default = "us-west-2" +} + +variable "aws_cloudwatch_retention_in_days" { + type = number + description = "AWS CloudWatch Logs Retention in Days" + default = 30 +} + +variable "app_name" { + type = string + description = "Application Name" + default = "mlops" +} + +variable "app_environment" { + type = string + description = "Application Environment" + default = "production" +} + +variable "cidr" { + description = "The CIDR block for the VPC." + default = "10.32.0.0/16" +} + +variable "database_user" { + type = string + description = "The username of the Postgres database." + default = "mageuser" +} + +variable "database_password" { + type = string + description = "The password of the Postgres database." + sensitive = true +} + +variable "docker_image" { + description = "Docker image url used in ECS task." + default = "mageai/mageai:alpha" +} + +variable "ecs_task_cpu" { + description = "ECS task cpu" + default = 512 +} + +variable "ecs_task_memory" { + description = "ECS task memory" + default = 1024 +} + +variable "public_subnets" { + description = "List of public subnets" + default = ["10.32.100.0/24", "10.32.101.0/24"] +} + +variable "private_subnets" { + description = "List of private subnets" + default = ["10.32.0.0/24", "10.32.1.0/24"] +} + +variable "availability_zones" { + description = "List of availability zones" + default = ["us-west-2a", "us-west-2b"] +} + +# Extra variables specific to this project. + +variable "experiments_database_name" { + type = string + description = "The name of the database to store experiments that track training runs." + default = "experiments" +} + +variable "smtp_email" { + type = string + description = "The email address to use for sending emails." + default = "" +} + +variable "smtp_password" { + type = string + description = "The password to use for sending emails." + default = "" +} diff --git a/mlops/unit_5_deploying/aws/vpc.tf b/mlops/unit_5_deploying/aws/vpc.tf new file mode 100644 index 000000000..bcf5f4004 --- /dev/null +++ b/mlops/unit_5_deploying/aws/vpc.tf @@ -0,0 +1,11 @@ +# vpc.tf | VPC Configuration + +resource "aws_vpc" "aws-vpc" { + cidr_block = var.cidr + enable_dns_hostnames = true + enable_dns_support = true + tags = { + Name = "${var.app_name}-vpc" + Environment = var.app_environment + } +} diff --git a/scripts/start.sh b/scripts/start.sh index 4fbaa802e..dab7207b4 100755 --- a/scripts/start.sh +++ b/scripts/start.sh @@ -3,4 +3,8 @@ # MAGE_CODE_PATH matches with the value in .env.dev # PROJECT_NAME matches with the value in .env.dev -MAGE_CODE_PATH=/home/mage_code PROJECT_NAME=mlops docker compose up +MAGE_CODE_PATH=/home/mage_code \ + PROJECT_NAME=mlops \ + SMTP_EMAIL=$SMTP_EMAIL \ + SMTP_PASSWORD=$SMTP_PASSWORD \ + docker compose up