From a0840ddec25024911b863f58684245d8f2d29a02 Mon Sep 17 00:00:00 2001 From: "Christian M. Adams" Date: Wed, 12 May 2021 10:31:18 -0400 Subject: [PATCH 1/5] Enforce EE pull policy on Container Groups --- awx/main/tasks.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/awx/main/tasks.py b/awx/main/tasks.py index ece5455693..34e6ca6215 100644 --- a/awx/main/tasks.py +++ b/awx/main/tasks.py @@ -3074,6 +3074,11 @@ class AWXReceptorJob: pod_spec['spec']['containers'][0]['image'] = ee.image pod_spec['spec']['containers'][0]['args'] = ['ansible-runner', 'worker', '--private-data-dir=/runner'] + # Enforce EE Pull Policy + pull_options = {"always": "Always", "missing": "IfNotPresent", "never": "Never"} + if self.task and self.task.instance.execution_environment: + pod_spec['spec']['containers'][0]['imagePullPolicy'] = pull_options[self.task.instance.execution_environment.pull] + if self.task: pod_spec['metadata'] = deepmerge( pod_spec.get('metadata', {}), From 8316a1d198cadef83b9165dcb0f43c1991e3d113 Mon Sep 17 00:00:00 2001 From: "Christian M. Adams" Date: Wed, 12 May 2021 15:19:38 -0400 Subject: [PATCH 2/5] Create pull secret in cluster and use it in PodSpec - base64 encode secret values before creating the secret - Construct valid .dockerconfigjson - Cancel jobs where it will obviously fail & error handling - Check if the secret exists first, then attempts to replace it if it does. --- awx/main/scheduler/kubernetes.py | 67 ++++++++++++++++++++++++++++++++ awx/main/tasks.py | 15 ++++++- 2 files changed, 81 insertions(+), 1 deletion(-) diff --git a/awx/main/scheduler/kubernetes.py b/awx/main/scheduler/kubernetes.py index 0313d463a0..d68aa5cb6e 100644 --- a/awx/main/scheduler/kubernetes.py +++ b/awx/main/scheduler/kubernetes.py @@ -1,4 +1,5 @@ import collections +import json import logging from base64 import b64encode @@ -51,6 +52,72 @@ class PodManager(object): return pods + @classmethod + def create_secret(self, job): + task = collections.namedtuple('Task', 'id instance_group')(id='', instance_group=job.instance_group) + pm = PodManager(task) + registry_cred = job.execution_environment.credential + host = registry_cred.get_input('host').split('/')[0] + username = registry_cred.get_input("username") + password = registry_cred.get_input("password") + + # Construct container auth dict and base64 encode it + token = b64encode("{}:{}".format(username, password).encode('ascii')).decode() + auth_dict = json.dumps({"auths": {host: {"auth": token}}}, indent=4) + auth_data = b64encode(str(auth_dict).encode('ascii')).decode() + + # Construct Secret object + secret = client.V1Secret() + secret_name = "automation-{0}-image-pull-secret-{1}".format(settings.INSTALL_UUID[:5], job.execution_environment.credential.id) + secret.metadata = client.V1ObjectMeta(name="{}".format(secret_name)) + secret.type = "kubernetes.io/dockerconfigjson" + secret.kind = "Secret" + secret.data = {".dockerconfigjson": auth_data} + + # Check if secret already exists + secrets = None + try: + secrets = pm.kube_api.list_namespaced_secret(namespace=pm.namespace) + except client.rest.ApiException: + error_msg = 'Invalid openshift or k8s cluster credential' + logger.exception(error_msg) + job.cancel(job_explanation=error_msg) + raise + + if secrets: + secret_exists = False + secrets_dict = secrets.to_dict().get('items', []) + for s in secrets_dict: + if s['metadata']['name'] == secret_name: + secret_exists = True + if secret_exists: + try: + # Try to replace existing secret + pm.kube_api.delete_namespaced_secret(name=secret.metadata.name, namespace=pm.namespace) + pm.kube_api.create_namespaced_secret(namespace=pm.namespace, body=secret) + except Exception: + error_msg = 'Failed to create imagePullSecret for container group {}'.format(task.instance_group.name) + logger.exception(error_msg) + job.cancel(job_explanation=error_msg) + raise + else: + # Create an image pull secret in namespace + try: + pm.kube_api.create_namespaced_secret(namespace=pm.namespace, body=secret) + except client.rest.ApiException as e: + if e.status == 401: + error_msg = 'Failed to create imagePullSecret: {}. Check that openshift or k8s credential has permission to create a secret.'.format( + e.status + ) + logger.exception(error_msg) + # let job run for the case that the secret exists but the cluster cred doesn't have permission to create a secret + except Exception: + error_msg = 'Failed to create imagePullSecret for container group {}'.format(task.instance_group.name) + logger.exception(error_msg) + job.cancel(job_explanation=error_msg) + + return secret.metadata.name + @property def namespace(self): return self.pod_definition['metadata']['namespace'] diff --git a/awx/main/tasks.py b/awx/main/tasks.py index 34e6ca6215..e81aef40c9 100644 --- a/awx/main/tasks.py +++ b/awx/main/tasks.py @@ -3077,7 +3077,20 @@ class AWXReceptorJob: # Enforce EE Pull Policy pull_options = {"always": "Always", "missing": "IfNotPresent", "never": "Never"} if self.task and self.task.instance.execution_environment: - pod_spec['spec']['containers'][0]['imagePullPolicy'] = pull_options[self.task.instance.execution_environment.pull] + if self.task.instance.execution_environment.pull: + pod_spec['spec']['containers'][0]['imagePullPolicy'] = pull_options[self.task.instance.execution_environment.pull] + + if self.task and self.task.instance.is_container_group_task: + # If EE credential is passed, create an imagePullSecret + if self.task.instance.execution_environment and self.task.instance.execution_environment.credential: + # Create pull secret in k8s cluster based on ee cred + from awx.main.scheduler.kubernetes import PodManager # prevent circular import + + pm = PodManager(self.task.instance) + secret_name = pm.create_secret(job=self.task.instance) + + # Inject secret name into podspec + pod_spec['spec']['imagePullSecrets'] = [{"name": secret_name}] if self.task: pod_spec['metadata'] = deepmerge( From cea6d8c3cb4378ff46b64ff6fdfd7a029def3be1 Mon Sep 17 00:00:00 2001 From: "Christian M. Adams" Date: Thu, 20 May 2021 11:47:52 -0400 Subject: [PATCH 3/5] Use utf-8 & properly parse hostname from registry URL --- awx/main/scheduler/kubernetes.py | 33 +++++++++++++++++++----------- awx/main/tasks.py | 2 +- awx/main/utils/external_logging.py | 2 +- 3 files changed, 23 insertions(+), 14 deletions(-) diff --git a/awx/main/scheduler/kubernetes.py b/awx/main/scheduler/kubernetes.py index d68aa5cb6e..7db008f805 100644 --- a/awx/main/scheduler/kubernetes.py +++ b/awx/main/scheduler/kubernetes.py @@ -2,6 +2,7 @@ import collections import json import logging from base64 import b64encode +from urllib import parse as urlparse from django.conf import settings from kubernetes import client, config @@ -52,19 +53,26 @@ class PodManager(object): return pods - @classmethod def create_secret(self, job): - task = collections.namedtuple('Task', 'id instance_group')(id='', instance_group=job.instance_group) - pm = PodManager(task) registry_cred = job.execution_environment.credential - host = registry_cred.get_input('host').split('/')[0] + host = registry_cred.get_input('host') + scheme = 'https' + # urlparse requires '//' to be provided if scheme is not specified + original_parsed = urlparse.urlsplit(host) + if (not original_parsed.scheme and not host.startswith('//')) or original_parsed.hostname is None: + host = '%s://%s' % (scheme, host) + parsed = urlparse.urlsplit(host) + host = parsed.hostname + if parsed.port: + host = "{0}:{1}".format(host, parsed.port) + username = registry_cred.get_input("username") password = registry_cred.get_input("password") # Construct container auth dict and base64 encode it - token = b64encode("{}:{}".format(username, password).encode('ascii')).decode() + token = b64encode("{}:{}".format(username, password).encode('UTF-8')).decode() auth_dict = json.dumps({"auths": {host: {"auth": token}}}, indent=4) - auth_data = b64encode(str(auth_dict).encode('ascii')).decode() + auth_data = b64encode(str(auth_dict).encode('UTF-8')).decode() # Construct Secret object secret = client.V1Secret() @@ -77,7 +85,7 @@ class PodManager(object): # Check if secret already exists secrets = None try: - secrets = pm.kube_api.list_namespaced_secret(namespace=pm.namespace) + secrets = self.kube_api.list_namespaced_secret(namespace=self.namespace) except client.rest.ApiException: error_msg = 'Invalid openshift or k8s cluster credential' logger.exception(error_msg) @@ -90,20 +98,21 @@ class PodManager(object): for s in secrets_dict: if s['metadata']['name'] == secret_name: secret_exists = True + break if secret_exists: try: # Try to replace existing secret - pm.kube_api.delete_namespaced_secret(name=secret.metadata.name, namespace=pm.namespace) - pm.kube_api.create_namespaced_secret(namespace=pm.namespace, body=secret) + self.kube_api.delete_namespaced_secret(name=secret.metadata.name, namespace=self.namespace) + self.kube_api.create_namespaced_secret(namespace=self.namespace, body=secret) except Exception: - error_msg = 'Failed to create imagePullSecret for container group {}'.format(task.instance_group.name) + error_msg = 'Failed to create imagePullSecret for container group {}'.format(job.instance_group.name) logger.exception(error_msg) job.cancel(job_explanation=error_msg) raise else: # Create an image pull secret in namespace try: - pm.kube_api.create_namespaced_secret(namespace=pm.namespace, body=secret) + self.kube_api.create_namespaced_secret(namespace=self.namespace, body=secret) except client.rest.ApiException as e: if e.status == 401: error_msg = 'Failed to create imagePullSecret: {}. Check that openshift or k8s credential has permission to create a secret.'.format( @@ -112,7 +121,7 @@ class PodManager(object): logger.exception(error_msg) # let job run for the case that the secret exists but the cluster cred doesn't have permission to create a secret except Exception: - error_msg = 'Failed to create imagePullSecret for container group {}'.format(task.instance_group.name) + error_msg = 'Failed to create imagePullSecret for container group {}'.format(job.instance_group.name) logger.exception(error_msg) job.cancel(job_explanation=error_msg) diff --git a/awx/main/tasks.py b/awx/main/tasks.py index e81aef40c9..fe234d7391 100644 --- a/awx/main/tasks.py +++ b/awx/main/tasks.py @@ -842,7 +842,7 @@ class BaseTask(object): username = cred.get_input('username') password = cred.get_input('password') token = "{}:{}".format(username, password) - auth_data = {'auths': {host: {'auth': b64encode(token.encode('ascii')).decode()}}} + auth_data = {'auths': {host: {'auth': b64encode(token.encode('UTF-8')).decode('UTF-8')}}} authfile.write(json.dumps(auth_data, indent=4)) params["container_options"].append(f'--authfile={authfile.name}') else: diff --git a/awx/main/utils/external_logging.py b/awx/main/utils/external_logging.py index 96acc371ea..3a86d24df7 100644 --- a/awx/main/utils/external_logging.py +++ b/awx/main/utils/external_logging.py @@ -51,7 +51,7 @@ def construct_rsyslog_conf_template(settings=settings): # urlparse requires '//' to be provided if scheme is not specified original_parsed = urlparse.urlsplit(host) if (not original_parsed.scheme and not host.startswith('//')) or original_parsed.hostname is None: - host = '%s://%s' % (scheme, host) if scheme else '//%s' % host + host = '%s://%s' % (scheme, host) parsed = urlparse.urlsplit(host) host = escape_quotes(parsed.hostname) From d607dfd5d8c17a1990f2cef84dc60ae403970016 Mon Sep 17 00:00:00 2001 From: "Christian M. Adams" Date: Mon, 24 May 2021 09:40:37 -0400 Subject: [PATCH 4/5] Added error handling for pull secret creation requests - Check (only) the existing secret to see if it's value would change. --- awx/main/scheduler/kubernetes.py | 95 +++++++++++++++++++------------- 1 file changed, 58 insertions(+), 37 deletions(-) diff --git a/awx/main/scheduler/kubernetes.py b/awx/main/scheduler/kubernetes.py index 7db008f805..dd27402b00 100644 --- a/awx/main/scheduler/kubernetes.py +++ b/awx/main/scheduler/kubernetes.py @@ -83,47 +83,68 @@ class PodManager(object): secret.data = {".dockerconfigjson": auth_data} # Check if secret already exists - secrets = None + replace_secret = False try: - secrets = self.kube_api.list_namespaced_secret(namespace=self.namespace) - except client.rest.ApiException: - error_msg = 'Invalid openshift or k8s cluster credential' - logger.exception(error_msg) - job.cancel(job_explanation=error_msg) - raise - - if secrets: - secret_exists = False - secrets_dict = secrets.to_dict().get('items', []) - for s in secrets_dict: - if s['metadata']['name'] == secret_name: - secret_exists = True - break - if secret_exists: - try: - # Try to replace existing secret - self.kube_api.delete_namespaced_secret(name=secret.metadata.name, namespace=self.namespace) - self.kube_api.create_namespaced_secret(namespace=self.namespace, body=secret) - except Exception: - error_msg = 'Failed to create imagePullSecret for container group {}'.format(job.instance_group.name) - logger.exception(error_msg) - job.cancel(job_explanation=error_msg) - raise + existing_secret = self.kube_api.read_namespaced_secret(namespace=self.namespace, name=secret_name) + if existing_secret.data != secret.data: + replace_secret = True + secret_exists = True + except client.rest.ApiException as e: + if e.status == 404: + secret_exists = False else: - # Create an image pull secret in namespace - try: - self.kube_api.create_namespaced_secret(namespace=self.namespace, body=secret) - except client.rest.ApiException as e: - if e.status == 401: - error_msg = 'Failed to create imagePullSecret: {}. Check that openshift or k8s credential has permission to create a secret.'.format( - e.status + error_msg = _('Invalid openshift or k8s cluster credential') + if e.status == 403: + error_msg = _( + 'Failed to create secret for container group {} because the needed service account roles are needed. Add get, list, create and delete roles for secret resources for your cluster credential.'.format( + job.instance_group.name ) - logger.exception(error_msg) - # let job run for the case that the secret exists but the cluster cred doesn't have permission to create a secret - except Exception: - error_msg = 'Failed to create imagePullSecret for container group {}'.format(job.instance_group.name) + ) + full_error_msg = '{0}: {1}'.format(error_msg, str(e)) + logger.exception(full_error_msg) + job.job_explanation = error_msg + job.save() + raise PermissionError(full_error_msg) + + if replace_secret: + try: + # Try to replace existing secret + self.kube_api.delete_namespaced_secret(name=secret.metadata.name, namespace=self.namespace) + self.kube_api.create_namespaced_secret(namespace=self.namespace, body=secret) + except client.rest.ApiException as e: + error_msg = _('Invalid openshift or k8s cluster credential') + if e.status == 403: + error_msg = _( + 'Failed to delete secret for container group {} because the needed service account roles are needed. Add create and delete roles for secret resources for your cluster credential.'.format( + job.instance_group.name + ) + ) + full_error_msg = '{0}: {1}'.format(error_msg, str(e)) + logger.exception(full_error_msg) + job.job_explanation = error_msg + job.save() + # let job continue for the case where secret was created manually and cluster cred doesn't have permission to create a secret + except Exception as e: + error_msg = 'Failed to create imagePullSecret for container group {}'.format(job.instance_group.name) + logger.exception('{0}: {1}'.format(error_msg, str(e))) + raise RuntimeError(error_msg) + elif secret_exists and not replace_secret: + pass + else: + # Create an image pull secret in namespace + try: + self.kube_api.create_namespaced_secret(namespace=self.namespace, body=secret) + except client.rest.ApiException as e: + if e.status == 403: + error_msg = _( + 'Failed to create imagePullSecret: {}. Check that openshift or k8s credential has permission to create a secret.'.format(e.status) + ) logger.exception(error_msg) - job.cancel(job_explanation=error_msg) + # let job continue for the case where secret was created manually and cluster cred doesn't have permission to create a secret + except Exception: + error_msg = 'Failed to create imagePullSecret for container group {}'.format(job.instance_group.name) + logger.exception(error_msg) + job.cancel(job_explanation=error_msg) return secret.metadata.name From 536c02dc55f8259c9aecbbc85473c7246d5d954e Mon Sep 17 00:00:00 2001 From: "Christian M. Adams" Date: Tue, 25 May 2021 15:13:56 -0400 Subject: [PATCH 5/5] Simplify hostname parsing --- awx/main/scheduler/kubernetes.py | 8 ++------ awx/main/utils/external_logging.py | 3 +-- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/awx/main/scheduler/kubernetes.py b/awx/main/scheduler/kubernetes.py index dd27402b00..514f9e4624 100644 --- a/awx/main/scheduler/kubernetes.py +++ b/awx/main/scheduler/kubernetes.py @@ -7,6 +7,7 @@ from urllib import parse as urlparse from django.conf import settings from kubernetes import client, config from django.utils.functional import cached_property +from django.utils.translation import ugettext_lazy as _ from awx.main.utils.common import parse_yaml_or_json from awx.main.utils.execution_environments import get_default_pod_spec @@ -56,11 +57,10 @@ class PodManager(object): def create_secret(self, job): registry_cred = job.execution_environment.credential host = registry_cred.get_input('host') - scheme = 'https' # urlparse requires '//' to be provided if scheme is not specified original_parsed = urlparse.urlsplit(host) if (not original_parsed.scheme and not host.startswith('//')) or original_parsed.hostname is None: - host = '%s://%s' % (scheme, host) + host = 'https://%s' % (host) parsed = urlparse.urlsplit(host) host = parsed.hostname if parsed.port: @@ -102,8 +102,6 @@ class PodManager(object): ) full_error_msg = '{0}: {1}'.format(error_msg, str(e)) logger.exception(full_error_msg) - job.job_explanation = error_msg - job.save() raise PermissionError(full_error_msg) if replace_secret: @@ -121,8 +119,6 @@ class PodManager(object): ) full_error_msg = '{0}: {1}'.format(error_msg, str(e)) logger.exception(full_error_msg) - job.job_explanation = error_msg - job.save() # let job continue for the case where secret was created manually and cluster cred doesn't have permission to create a secret except Exception as e: error_msg = 'Failed to create imagePullSecret for container group {}'.format(job.instance_group.name) diff --git a/awx/main/utils/external_logging.py b/awx/main/utils/external_logging.py index 3a86d24df7..26f434a4e4 100644 --- a/awx/main/utils/external_logging.py +++ b/awx/main/utils/external_logging.py @@ -47,11 +47,10 @@ def construct_rsyslog_conf_template(settings=settings): return tmpl if protocol.startswith('http'): - scheme = 'https' # urlparse requires '//' to be provided if scheme is not specified original_parsed = urlparse.urlsplit(host) if (not original_parsed.scheme and not host.startswith('//')) or original_parsed.hostname is None: - host = '%s://%s' % (scheme, host) + host = 'https://%s' % (host) parsed = urlparse.urlsplit(host) host = escape_quotes(parsed.hostname)