mirror of
https://github.com/ZwareBear/awx.git
synced 2026-03-20 07:43:35 -05:00
generate control node receptor.conf
when a new remote execution/hop node is added regenerate the receptor.conf for all control node to peer out to the new remote execution node Signed-off-by: Hao Liu <haoli@redhat.com> Co-Authored-By: Seth Foster <fosterseth@users.noreply.github.com> Co-Authored-By: Shane McDonald <me@shanemcd.com>
This commit is contained in:
committed by
Jeff Bradberry
parent
4bf9925cf7
commit
9b034ad574
@@ -423,6 +423,13 @@ def on_instance_group_saved(sender, instance, created=False, raw=False, **kwargs
|
|||||||
|
|
||||||
@receiver(post_save, sender=Instance)
|
@receiver(post_save, sender=Instance)
|
||||||
def on_instance_saved(sender, instance, created=False, raw=False, **kwargs):
|
def on_instance_saved(sender, instance, created=False, raw=False, **kwargs):
|
||||||
|
# TODO: handle update to instance
|
||||||
|
if settings.IS_K8S and created and instance.node_type in ('execution', 'hop'):
|
||||||
|
from awx.main.tasks.receptor import write_receptor_config # prevents circular import
|
||||||
|
|
||||||
|
# on commit broadcast to all control instance to update their receptor configs
|
||||||
|
connection.on_commit(lambda: write_receptor_config.apply_async(queue='tower_broadcast_all'))
|
||||||
|
|
||||||
if created or instance.has_policy_changes():
|
if created or instance.has_policy_changes():
|
||||||
schedule_policy_task()
|
schedule_policy_task()
|
||||||
|
|
||||||
|
|||||||
@@ -642,10 +642,6 @@ class TaskManager(TaskBase):
|
|||||||
found_acceptable_queue = True
|
found_acceptable_queue = True
|
||||||
break
|
break
|
||||||
|
|
||||||
# TODO: remove this after we have confidence that OCP control nodes are reporting node_type=control
|
|
||||||
if settings.IS_K8S and task.capacity_type == 'execution':
|
|
||||||
logger.debug("Skipping group {}, task cannot run on control plane".format(instance_group.name))
|
|
||||||
continue
|
|
||||||
# at this point we know the instance group is NOT a container group
|
# at this point we know the instance group is NOT a container group
|
||||||
# because if it was, it would have started the task and broke out of the loop.
|
# because if it was, it would have started the task and broke out of the loop.
|
||||||
execution_instance = self.instance_groups.fit_task_to_most_remaining_capacity_instance(
|
execution_instance = self.instance_groups.fit_task_to_most_remaining_capacity_instance(
|
||||||
|
|||||||
@@ -145,7 +145,7 @@ class BaseTask(object):
|
|||||||
"""
|
"""
|
||||||
Return params structure to be executed by the container runtime
|
Return params structure to be executed by the container runtime
|
||||||
"""
|
"""
|
||||||
if settings.IS_K8S:
|
if settings.IS_K8S and instance.instance_group.is_container_group:
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
image = instance.execution_environment.image
|
image = instance.execution_environment.image
|
||||||
|
|||||||
@@ -27,12 +27,17 @@ from awx.main.utils.common import (
|
|||||||
)
|
)
|
||||||
from awx.main.constants import MAX_ISOLATED_PATH_COLON_DELIMITER
|
from awx.main.constants import MAX_ISOLATED_PATH_COLON_DELIMITER
|
||||||
from awx.main.tasks.signals import signal_state, signal_callback, SignalExit
|
from awx.main.tasks.signals import signal_state, signal_callback, SignalExit
|
||||||
|
from awx.main.models import Instance
|
||||||
|
from awx.main.dispatch.publish import task
|
||||||
|
|
||||||
# Receptorctl
|
# Receptorctl
|
||||||
from receptorctl.socket_interface import ReceptorControl
|
from receptorctl.socket_interface import ReceptorControl
|
||||||
|
|
||||||
|
from filelock import FileLock
|
||||||
|
|
||||||
logger = logging.getLogger('awx.main.tasks.receptor')
|
logger = logging.getLogger('awx.main.tasks.receptor')
|
||||||
__RECEPTOR_CONF = '/etc/receptor/receptor.conf'
|
__RECEPTOR_CONF = '/etc/receptor/receptor.conf'
|
||||||
|
__RECEPTOR_CONF_LOCKFILE = f'{__RECEPTOR_CONF}.lock'
|
||||||
RECEPTOR_ACTIVE_STATES = ('Pending', 'Running')
|
RECEPTOR_ACTIVE_STATES = ('Pending', 'Running')
|
||||||
|
|
||||||
|
|
||||||
@@ -43,8 +48,10 @@ class ReceptorConnectionType(Enum):
|
|||||||
|
|
||||||
|
|
||||||
def get_receptor_sockfile():
|
def get_receptor_sockfile():
|
||||||
with open(__RECEPTOR_CONF, 'r') as f:
|
lock = FileLock(__RECEPTOR_CONF_LOCKFILE)
|
||||||
data = yaml.safe_load(f)
|
with lock:
|
||||||
|
with open(__RECEPTOR_CONF, 'r') as f:
|
||||||
|
data = yaml.safe_load(f)
|
||||||
for section in data:
|
for section in data:
|
||||||
for entry_name, entry_data in section.items():
|
for entry_name, entry_data in section.items():
|
||||||
if entry_name == 'control-service':
|
if entry_name == 'control-service':
|
||||||
@@ -60,8 +67,10 @@ def get_tls_client(use_stream_tls=None):
|
|||||||
if not use_stream_tls:
|
if not use_stream_tls:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
with open(__RECEPTOR_CONF, 'r') as f:
|
lock = FileLock(__RECEPTOR_CONF_LOCKFILE)
|
||||||
data = yaml.safe_load(f)
|
with lock:
|
||||||
|
with open(__RECEPTOR_CONF, 'r') as f:
|
||||||
|
data = yaml.safe_load(f)
|
||||||
for section in data:
|
for section in data:
|
||||||
for entry_name, entry_data in section.items():
|
for entry_name, entry_data in section.items():
|
||||||
if entry_name == 'tls-client':
|
if entry_name == 'tls-client':
|
||||||
@@ -78,12 +87,25 @@ def get_receptor_ctl():
|
|||||||
return ReceptorControl(receptor_sockfile)
|
return ReceptorControl(receptor_sockfile)
|
||||||
|
|
||||||
|
|
||||||
|
def find_node_in_mesh(node_name, receptor_ctl):
|
||||||
|
attempts = 10
|
||||||
|
backoff = 1
|
||||||
|
for attempt in range(attempts):
|
||||||
|
all_nodes = receptor_ctl.simple_command("status").get('Advertisements', None)
|
||||||
|
for node in all_nodes:
|
||||||
|
if node.get('NodeID') == node_name:
|
||||||
|
return node
|
||||||
|
else:
|
||||||
|
logger.warning(f"Instance {node_name} is not in the receptor mesh. {attempts-attempt} attempts left.")
|
||||||
|
time.sleep(backoff)
|
||||||
|
backoff += 1
|
||||||
|
else:
|
||||||
|
raise ReceptorNodeNotFound(f'Instance {node_name} is not in the receptor mesh')
|
||||||
|
|
||||||
|
|
||||||
def get_conn_type(node_name, receptor_ctl):
|
def get_conn_type(node_name, receptor_ctl):
|
||||||
all_nodes = receptor_ctl.simple_command("status").get('Advertisements', None)
|
node = find_node_in_mesh(node_name, receptor_ctl)
|
||||||
for node in all_nodes:
|
return ReceptorConnectionType(node.get('ConnType'))
|
||||||
if node.get('NodeID') == node_name:
|
|
||||||
return ReceptorConnectionType(node.get('ConnType'))
|
|
||||||
raise ReceptorNodeNotFound(f'Instance {node_name} is not in the receptor mesh')
|
|
||||||
|
|
||||||
|
|
||||||
def administrative_workunit_reaper(work_list=None):
|
def administrative_workunit_reaper(work_list=None):
|
||||||
@@ -574,3 +596,66 @@ class AWXReceptorJob:
|
|||||||
else:
|
else:
|
||||||
config["clusters"][0]["cluster"]["insecure-skip-tls-verify"] = True
|
config["clusters"][0]["cluster"]["insecure-skip-tls-verify"] = True
|
||||||
return config
|
return config
|
||||||
|
|
||||||
|
|
||||||
|
RECEPTOR_CONFIG_STARTER = (
|
||||||
|
{'control-service': {'service': 'control', 'filename': '/var/run/receptor/receptor.sock', 'permissions': '0600'}},
|
||||||
|
{'local-only': None},
|
||||||
|
{'work-command': {'worktype': 'local', 'command': 'ansible-runner', 'params': 'worker', 'allowruntimeparams': True}},
|
||||||
|
{
|
||||||
|
'work-kubernetes': {
|
||||||
|
'worktype': 'kubernetes-runtime-auth',
|
||||||
|
'authmethod': 'runtime',
|
||||||
|
'allowruntimeauth': True,
|
||||||
|
'allowruntimepod': True,
|
||||||
|
'allowruntimeparams': True,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'work-kubernetes': {
|
||||||
|
'worktype': 'kubernetes-incluster-auth',
|
||||||
|
'authmethod': 'incluster',
|
||||||
|
'allowruntimeauth': True,
|
||||||
|
'allowruntimepod': True,
|
||||||
|
'allowruntimeparams': True,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'tls-client': {
|
||||||
|
'name': 'tlsclient',
|
||||||
|
'rootcas': '/etc/receptor/tls/ca/receptor-ca.crt',
|
||||||
|
'cert': '/etc/receptor/tls/receptor.crt',
|
||||||
|
'key': '/etc/receptor/tls/receptor.key',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@task()
|
||||||
|
def write_receptor_config():
|
||||||
|
receptor_config = list(RECEPTOR_CONFIG_STARTER)
|
||||||
|
|
||||||
|
instances = Instance.objects.exclude(node_type='control')
|
||||||
|
for instance in instances:
|
||||||
|
peer = {'tcp-peer': {'address': f'{instance.hostname}:{instance.listener_port}', 'tls': 'tlsclient'}}
|
||||||
|
receptor_config.append(peer)
|
||||||
|
|
||||||
|
lock = FileLock(__RECEPTOR_CONF_LOCKFILE)
|
||||||
|
with lock:
|
||||||
|
with open(__RECEPTOR_CONF, 'w') as file:
|
||||||
|
yaml.dump(receptor_config, file, default_flow_style=False)
|
||||||
|
|
||||||
|
receptor_ctl = get_receptor_ctl()
|
||||||
|
|
||||||
|
attempts = 10
|
||||||
|
backoff = 1
|
||||||
|
for attempt in range(attempts):
|
||||||
|
try:
|
||||||
|
receptor_ctl.simple_command("reload")
|
||||||
|
break
|
||||||
|
except ValueError:
|
||||||
|
logger.warning(f"Unable to reload Receptor configuration. {attempts-attempt} attempts left.")
|
||||||
|
time.sleep(backoff)
|
||||||
|
backoff += 1
|
||||||
|
else:
|
||||||
|
raise RuntimeError("Receptor reload failed")
|
||||||
|
|||||||
@@ -61,7 +61,7 @@ from awx.main.utils.common import (
|
|||||||
from awx.main.utils.external_logging import reconfigure_rsyslog
|
from awx.main.utils.external_logging import reconfigure_rsyslog
|
||||||
from awx.main.utils.reload import stop_local_services
|
from awx.main.utils.reload import stop_local_services
|
||||||
from awx.main.utils.pglock import advisory_lock
|
from awx.main.utils.pglock import advisory_lock
|
||||||
from awx.main.tasks.receptor import get_receptor_ctl, worker_info, worker_cleanup, administrative_workunit_reaper
|
from awx.main.tasks.receptor import get_receptor_ctl, worker_info, worker_cleanup, administrative_workunit_reaper, write_receptor_config
|
||||||
from awx.main.consumers import emit_channel_notification
|
from awx.main.consumers import emit_channel_notification
|
||||||
from awx.main import analytics
|
from awx.main import analytics
|
||||||
from awx.conf import settings_registry
|
from awx.conf import settings_registry
|
||||||
@@ -81,6 +81,10 @@ Try upgrading OpenSSH or providing your private key in an different format. \
|
|||||||
def dispatch_startup():
|
def dispatch_startup():
|
||||||
startup_logger = logging.getLogger('awx.main.tasks')
|
startup_logger = logging.getLogger('awx.main.tasks')
|
||||||
|
|
||||||
|
# TODO: Enable this on VM installs
|
||||||
|
if settings.IS_K8S:
|
||||||
|
write_receptor_config()
|
||||||
|
|
||||||
startup_logger.debug("Syncing Schedules")
|
startup_logger.debug("Syncing Schedules")
|
||||||
for sch in Schedule.objects.all():
|
for sch in Schedule.objects.all():
|
||||||
try:
|
try:
|
||||||
@@ -555,7 +559,7 @@ def cluster_node_heartbeat(dispatch_time=None, worker_tasks=None):
|
|||||||
except Exception:
|
except Exception:
|
||||||
logger.exception('failed to reap jobs for {}'.format(other_inst.hostname))
|
logger.exception('failed to reap jobs for {}'.format(other_inst.hostname))
|
||||||
try:
|
try:
|
||||||
if settings.AWX_AUTO_DEPROVISION_INSTANCES:
|
if settings.AWX_AUTO_DEPROVISION_INSTANCES and other_inst.node_type == "control":
|
||||||
deprovision_hostname = other_inst.hostname
|
deprovision_hostname = other_inst.hostname
|
||||||
other_inst.delete() # FIXME: what about associated inbound links?
|
other_inst.delete() # FIXME: what about associated inbound links?
|
||||||
logger.info("Host {} Automatically Deprovisioned.".format(deprovision_hostname))
|
logger.info("Host {} Automatically Deprovisioned.".format(deprovision_hostname))
|
||||||
|
|||||||
24
docs/licenses/filelock.txt
Normal file
24
docs/licenses/filelock.txt
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
This is free and unencumbered software released into the public domain.
|
||||||
|
|
||||||
|
Anyone is free to copy, modify, publish, use, compile, sell, or
|
||||||
|
distribute this software, either in source code form or as a compiled
|
||||||
|
binary, for any purpose, commercial or non-commercial, and by any
|
||||||
|
means.
|
||||||
|
|
||||||
|
In jurisdictions that recognize copyright laws, the author or authors
|
||||||
|
of this software dedicate any and all copyright interest in the
|
||||||
|
software to the public domain. We make this dedication for the benefit
|
||||||
|
of the public at large and to the detriment of our heirs and
|
||||||
|
successors. We intend this dedication to be an overt act of
|
||||||
|
relinquishment in perpetuity of all present and future rights to this
|
||||||
|
software under copyright law.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||||
|
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||||
|
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
|
||||||
|
For more information, please refer to <http://unlicense.org>
|
||||||
@@ -25,6 +25,7 @@ django-split-settings
|
|||||||
django-taggit
|
django-taggit
|
||||||
djangorestframework==3.13.1
|
djangorestframework==3.13.1
|
||||||
djangorestframework-yaml
|
djangorestframework-yaml
|
||||||
|
filelock
|
||||||
GitPython>=3.1.1 # minimum to fix https://github.com/ansible/awx/issues/6119
|
GitPython>=3.1.1 # minimum to fix https://github.com/ansible/awx/issues/6119
|
||||||
irc
|
irc
|
||||||
jinja2>=2.11.3 # CVE-2020-28493
|
jinja2>=2.11.3 # CVE-2020-28493
|
||||||
|
|||||||
@@ -132,6 +132,8 @@ docutils==0.16
|
|||||||
# via python-daemon
|
# via python-daemon
|
||||||
ecdsa==0.18.0
|
ecdsa==0.18.0
|
||||||
# via python-jose
|
# via python-jose
|
||||||
|
filelock==3.8.0
|
||||||
|
# via -r /awx_devel/requirements/requirements.in
|
||||||
# via
|
# via
|
||||||
# -r /awx_devel/requirements/requirements_git.txt
|
# -r /awx_devel/requirements/requirements_git.txt
|
||||||
# django-radius
|
# django-radius
|
||||||
|
|||||||
@@ -109,6 +109,20 @@
|
|||||||
mode: '0600'
|
mode: '0600'
|
||||||
with_sequence: start=1 end={{ control_plane_node_count }}
|
with_sequence: start=1 end={{ control_plane_node_count }}
|
||||||
|
|
||||||
|
- name: Create Receptor Config Lock File
|
||||||
|
file:
|
||||||
|
path: "{{ sources_dest }}/receptor/receptor-awx-{{ item }}.conf.lock"
|
||||||
|
state: touch
|
||||||
|
mode: '0600'
|
||||||
|
with_sequence: start=1 end={{ control_plane_node_count }}
|
||||||
|
|
||||||
|
- name: Render Receptor Config(s) for Control Plane
|
||||||
|
template:
|
||||||
|
src: "receptor-awx.conf.j2"
|
||||||
|
dest: "{{ sources_dest }}/receptor/receptor-awx-{{ item }}.conf"
|
||||||
|
mode: '0600'
|
||||||
|
with_sequence: start=1 end={{ control_plane_node_count }}
|
||||||
|
|
||||||
- name: Render Receptor Hop Config
|
- name: Render Receptor Hop Config
|
||||||
template:
|
template:
|
||||||
src: "receptor-hop.conf.j2"
|
src: "receptor-hop.conf.j2"
|
||||||
|
|||||||
@@ -42,6 +42,7 @@ services:
|
|||||||
- "../../docker-compose/_sources/local_settings.py:/etc/tower/conf.d/local_settings.py"
|
- "../../docker-compose/_sources/local_settings.py:/etc/tower/conf.d/local_settings.py"
|
||||||
- "../../docker-compose/_sources/SECRET_KEY:/etc/tower/SECRET_KEY"
|
- "../../docker-compose/_sources/SECRET_KEY:/etc/tower/SECRET_KEY"
|
||||||
- "../../docker-compose/_sources/receptor/receptor-awx-{{ loop.index }}.conf:/etc/receptor/receptor.conf"
|
- "../../docker-compose/_sources/receptor/receptor-awx-{{ loop.index }}.conf:/etc/receptor/receptor.conf"
|
||||||
|
- "../../docker-compose/_sources/receptor/receptor-awx-{{ loop.index }}.conf.lock:/etc/receptor/receptor.conf.lock"
|
||||||
- "../../docker-compose/_sources/receptor/work_public_key.pem:/etc/receptor/work_public_key.pem"
|
- "../../docker-compose/_sources/receptor/work_public_key.pem:/etc/receptor/work_public_key.pem"
|
||||||
- "../../docker-compose/_sources/receptor/work_private_key.pem:/etc/receptor/work_private_key.pem"
|
- "../../docker-compose/_sources/receptor/work_private_key.pem:/etc/receptor/work_private_key.pem"
|
||||||
# - "../../docker-compose/_sources/certs:/etc/receptor/certs" # TODO: optionally generate certs
|
# - "../../docker-compose/_sources/certs:/etc/receptor/certs" # TODO: optionally generate certs
|
||||||
|
|||||||
Reference in New Issue
Block a user