From 6e08c3567fdd482dc658576f7ae7f569585b7c6d Mon Sep 17 00:00:00 2001 From: Alan Rominger Date: Tue, 6 Dec 2022 14:43:13 -0500 Subject: [PATCH 1/6] Add missing disassociate trigger for policy task --- awx/api/views/mixin.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/awx/api/views/mixin.py b/awx/api/views/mixin.py index 63762c1634..a058d6ce9a 100644 --- a/awx/api/views/mixin.py +++ b/awx/api/views/mixin.py @@ -16,7 +16,7 @@ from rest_framework import status from awx.main.constants import ACTIVE_STATES from awx.main.utils import get_object_or_400 -from awx.main.models.ha import Instance, InstanceGroup +from awx.main.models.ha import Instance, InstanceGroup, schedule_policy_task from awx.main.models.organization import Team from awx.main.models.projects import Project from awx.main.models.inventory import Inventory @@ -107,6 +107,11 @@ class InstanceGroupMembershipMixin(object): if inst_name in ig_obj.policy_instance_list: ig_obj.policy_instance_list.pop(ig_obj.policy_instance_list.index(inst_name)) ig_obj.save(update_fields=['policy_instance_list']) + + # sometimes removing an instance has a non-obvious consequence + # this is almost always true if policy_instance_percentage or _minimum is non-zero + # after removing a single instance, the other memberships need to be re-balanced + schedule_policy_task() return response From 4a7f4d0ed41ff12f33d58d3bda902bcc23167ecb Mon Sep 17 00:00:00 2001 From: Alan Rominger Date: Tue, 6 Dec 2022 15:20:04 -0500 Subject: [PATCH 2/6] Remove uneditable Instance fields from API browser --- awx/api/views/__init__.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/awx/api/views/__init__.py b/awx/api/views/__init__.py index 525c449bc0..533f617993 100644 --- a/awx/api/views/__init__.py +++ b/awx/api/views/__init__.py @@ -344,6 +344,13 @@ class InstanceDetail(RetrieveUpdateAPIView): model = models.Instance serializer_class = serializers.InstanceSerializer + def update_raw_data(self, data): + # these fields are only valid on creation of an instance, so they unwanted on detail view + data.pop('listener_port', None) + data.pop('node_type', None) + data.pop('hostname', None) + return super(InstanceDetail, self).update_raw_data(data) + def update(self, request, *args, **kwargs): r = super(InstanceDetail, self).update(request, *args, **kwargs) if status.is_success(r.status_code): From 916b5642d29774092d5ceba28b63d23a9b87a7bd Mon Sep 17 00:00:00 2001 From: Seth Foster Date: Thu, 8 Dec 2022 16:03:58 -0500 Subject: [PATCH 3/6] Update task manager docs - DependencyManager and WorkflowManager - bulk reschedule - global task manager timeout - blocking logic Co-authored-by: Elijah DeLee Co-authored-by: John R Barker --- docs/task_manager_system.md | 140 +++++++++++++++++++++++++++++++----- 1 file changed, 122 insertions(+), 18 deletions(-) diff --git a/docs/task_manager_system.md b/docs/task_manager_system.md index f44d2069df..8d50404a5f 100644 --- a/docs/task_manager_system.md +++ b/docs/task_manager_system.md @@ -1,18 +1,108 @@ -# Task Manager Overview +# Task Manager System Overview -The task manager is responsible for deciding when jobs should be scheduled to run. When choosing a task to run, the considerations are: +The task management system is made up of three separate components: +1. Dependency Manager +2. Task Manager +3. Workflow Manager + +Each of these run in a separate dispatched task and can run at the same time as one another. + +This system is responsible for deciding when tasks should be scheduled to run. When choosing a task to run, the considerations are: 1. Creation time 2. Job dependencies 3. Capacity -Independent jobs are run in order of creation time, earliest first. Jobs with dependencies are also run in creation time order within the group of job dependencies. Capacity is the final consideration when deciding to release a job to be run by the task dispatcher. +Independent tasks are run in order of creation time, earliest first. Tasks with dependencies are also run in creation time order within the group of task dependencies. Capacity is the final consideration when deciding to release a task to be run by the dispatcher. -## Task Manager Architecture -The task manager has a single entry point, `Scheduler().schedule()`. The method may be called in parallel, at any time, as many times as the user wants. The `schedule()` function tries to acquire a single, global lock using the Instance table first recorded in the database. If the lock cannot be acquired, the method returns. The failure to acquire the lock indicates that there is another instance currently running `schedule()`. +## Dependency Manager + +Responsible for looking at each pending task and determining whether it should create a dependency for that task. +For example, if `update_on_launch` is enabled of a task, a project update will be created as a dependency of that task. The Dependency Manager is responsible for creating that project update. + +Dependencies can also have their own dependencies, for example, + +``` ++-----------+ +| | created by web API call +| Job A | +| | ++-----------+---+ + | + | + +-------v----+ + | Inventory | dependency of Job A + | Source | created by Dependency Manager + | Update B | + +------------+-------+ + | + | + +------v------+ + | Project | dependency of Inventory Source Update B + | Update C | created by Dependency Manager + +-------------+ +``` + + +### Dependency Manager Steps + +1. Get pending tasks (parent tasks) that have `dependencies_processed = False` +2. Create project update if + a. not already created + b. last project update outside of cache timeout window +3. Create inventory source update if + a. not already created + b. last inventory source update outside of cache timeout window +4. Check and create dependencies for these newly created dependencies + a. inventory source updates can have a project update dependency +5. All dependencies are linked to the parent task via the `dependent_jobs` field + a. This allows us to cancel the parent task if the dependency fails or is canceled +6. Update the parent tasks with `dependencies_processed = True` + + +## Task Manager + +Responsible for looking at each pending task and determining whether Task Manager can start that task. + +### Task Manager Steps + +1. Get pending, waiting, and running tasks that have `dependencies_processed = True` +2. Before processing pending tasks, the task manager first processes running tasks. This allows it to build a dependency graph and account for the currently consumed capacity in the system. + a. dependency graph is just an internal data structure that tracks which jobs are currently running. It also handles "soft" blocking logic + b. the capacity is tracked in memory on the `TaskManagerInstances` and `TaskManagerInstanceGroups` objects which are in-memory representations of the instances and instance groups. These data structures are used to help track what consumed capacity will be as we decide that we will start new tasks, and until such time that we actually commit the state changes to the database. +3. For each pending task: + a. Check if total number of tasks started on this task manager cycle is > `start_task_limit` + b. Check if [timed out](#Timing Out) + b. Check if task is blocked + c. Check if preferred instances have enough capacity to run the task +4. Start the task by changing status to `waiting` and submitting task to dispatcher + + +## Workflow Manager + +Responsible for looking at each workflow job and determining if next node can run + +### Worflow Manager Steps + +1. Get all running workflow jobs +2. Build up a workflow DAG for each workflow job +3. For each workflow job: + a. Check if [timed out](#Timing Out) + b. Check if next node can start based on previous node status and the associated success / failure / always logic +4. Create new task and signal start + + +## Task Manager System Architecture + +Each of the three managers has a single entry point, `schedule()`. The `schedule()` function tries to acquire a single, global lock recorded in the database. If the lock cannot be acquired, the method returns. The failure to acquire the lock indicates that there is another instance currently running `schedule()`. + +Each manager runs inside of an atomic DB transaction. If the dispatcher task that is running the manager is killed, none of the created tasks or updates will take effect. ### Hybrid Scheduler: Periodic + Event -The `schedule()` function is run (a) periodically by a background task and (b) on job creation or completion. The task manager system would behave correctly if it ran, exclusively, via (a) or (b). + +Each manager's `schedule()` function is run (a) periodically by a background task and (b) on job creation or completion. The task manager system would behave correctly if it ran, exclusively, via (a) or (b). + +Special note -- the workflow manager is not scheduled to run periodically *directly*, but piggy-backs off the task manager. That is, if task manager sees at least one running workflow job, it will schedule the workflow manager to run. `schedule()` is triggered via both mechanisms because of the following properties: 1. It reduces the time from launch to running, resulting a better user experience. @@ -20,21 +110,34 @@ The `schedule()` function is run (a) periodically by a background task and (b) o Empirically, the periodic task manager has been effective in the past and will continue to be relied upon with the added event-triggered `schedule()`. -### Scheduler Algorithm +### Bulk Reschedule - * Get all non-completed jobs, `all_tasks` - * Detect finished workflow jobs - * Spawn next workflow jobs if needed - * For each pending job, start with the oldest created job - * If the job is not blocked, and there is capacity in the instance group queue, then mark it as `waiting` and submit the job. +Typically each manager is ran asynchronously via the dispatcher system. Dispatcher tasks take resources, so it is important to not schedule tasks unnecessarily. We also need a mechanism to run the manager *after* an atomic transaction block. + +Scheduling the managers are facilitated through the `ScheduleTaskManager`, `ScheduleDependencyManager`, and `ScheduleWorkflowManager` classes. These are utilities that help prevent too many managers from being started via the dispatcher system. Think of it as a "do once" mechanism. + +```python3 +with transaction.atomic() + for t in tasks: + if condition: + ScheduleTaskManager.schedule() +``` + +In the above code, we only want to schedule the TaskManager once after all `tasks` have been processed. `ScheduleTaskManager.schedule()` will handle that logic correctly. + +### Timing out + +Because of the global lock of the each manager, only one manager can run at a time. If that manager gets stuck for whatever reason, it is important to kill it and let a new one take its place. As such, there is special code in the parent dispatcher process to SIGKILL any of the task system managers after a few minutes. + +There is an important side effect to this. Because the manager `schedule()` runs in a transaction, the next run will have re-process the same tasks again. This could lead a manager never being able to progress from one run to the next, as each time it times out. In this situation the task system is effectively stuck as new tasks cannot start. To mitigate this, each manager will check if is is about to hit the time out period and bail out early if so. This gives the manager enough time to commit the DB transaction, and the next manager cycle will be able to start with the next set of unprocessed tasks. This ensures that the system can still make incremental progress under high workloads (i.e. many pending tasks). ### Job Lifecycle | Job Status | State | -|:----------:|:------------------------------------------------------------------------------------------------------------------:| +|:-----------|:-------------------------------------------------------------------------------------------------------------------| | pending | Job has been launched.
1. Hasn't yet been seen by the scheduler
2. Is blocked by another task
3. Not enough capacity | -| waiting | Job published to an AMQP queue. +| waiting | Job submitted to dispatcher via pg_notify | running | Job is running on a AWX node. | successful | Job finished with `ansible-playbook` return code 0. | | failed | Job finished with `ansible-playbook` return code other than 0. | @@ -46,19 +149,20 @@ Empirically, the periodic task manager has been effective in the past and will c The Task Manager decides which exact node a job will run on. It does so by considering user-configured group execution policy and user-configured capacity. First, the set of groups on which a job _can_ run on is constructed (see the AWX document on [Clustering](https://github.com/ansible/awx/blob/devel/docs/clustering.md)). The groups are traversed until a node within that group is found. The node with the largest remaining capacity that is idle is chosen first. If there are no idle nodes, then the node with the largest remaining capacity greater than or equal to the job capacity requirements is chosen. -## Code Composition +## Managers are short-lived -The main goal of the new task manager is to run in our HA environment. This translates to making the task manager logic run on any AWX node. To support this, we need to remove any reliance on the state between task manager schedule logic runs. A future goal of AWX is to design the task manager to have limited/no access to the database for this feature. This secondary requirement, combined with performance needs, led to the creation of partial models that wrap dict database model data. +Manager instances are short lived. Each time it runs, a new instance of the manager class is created, relevant data is pulled in from database, and the manager processes the data. After running, the instance is cleaned up. ### Blocking Logic The blocking logic is handled by a mixture of ORM instance references and task manager local tracking data in the scheduler instance. +There is a distinction between so-called "hard" vs "soft" blocking. -## Acceptance Tests +**Hard blocking** refers to dependencies that are represented in the database via the task `dependent_jobs` field. That is, Job A will not run if any of its `dependent_jobs` are still running. -The new task manager should, in essence, work like the old one. Old task manager features were identified while new ones were discovered in the process of creating the new task manager. Rules for the new task manager behavior are iterated below; testing should ensure that those rules are followed. +**Soft blocking** refers to blocking logic that doesn't have a database representation. Imagine Job A and B are both based on the same job template, and concurrent jobs is `disabled`. Job B will be blocked from running if Job A is already running. This is determined purely by the task manager tracking running jobs via the Dependency Graph. ### Task Manager Rules From 76cecf3f6be4769ac4bfd4c105f337ffd5bc4336 Mon Sep 17 00:00:00 2001 From: Elijah DeLee Date: Wed, 7 Dec 2022 13:21:58 -0500 Subject: [PATCH 4/6] update capacity docs to cover hybrid node case this came up in conversation and I saw this was not in this doc as an example --- docs/capacity.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/capacity.md b/docs/capacity.md index 4249effae6..cce0007d25 100644 --- a/docs/capacity.md +++ b/docs/capacity.md @@ -98,6 +98,7 @@ Examples: Given settings.AWX_CONTROL_NODE_TASK_IMPACT is 1: - Project updates (where the execution_node is always the same as the controller_node), have a total impact of 2. - Container group jobs (where the execution node is not a member of the cluster) only control impact applies, and the controller node has a total task impact of 1. + - A job executing on a "hybrid" node where both control and execution will occur on the same node has the task impact of (1 overhead for ansible main process) + (min(forks,hosts)) + (1 control node task impact). Meaning a Job running on a hybrid node with forks set to 1 would have a total task impact of 3. ### Selecting the Right settings.AWX_CONTROL_NODE_TASK_IMPACT From 0b96d617ac41697b1cdeaf7c271dea47aa2be6b5 Mon Sep 17 00:00:00 2001 From: Rick Elrod Date: Tue, 13 Dec 2022 15:07:15 -0600 Subject: [PATCH 5/6] Fix BROADCAST_WEBSOCKET_PORT for Kube dev (#13243) - `settings/minikube.py` gets imported conditionally, when the environment variable `AWX_KUBE_DEVEL` is set. In this imported file, we set `BROADCAST_WEBSOCKET_PORT = 8013`, but 8013 is only used in the docker-compose dev environment. In Kubernetes environments, 8052 is used for everything. This is hardcoded awx-operator's ConfigMap. - Also rename `minikube.py` because it is used for every kind of development Kube environment, including Kind. Signed-off-by: Rick Elrod --- awx/settings/development.py | 2 +- awx/settings/{minikube.py => development_kube.py} | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename awx/settings/{minikube.py => development_kube.py} (80%) diff --git a/awx/settings/development.py b/awx/settings/development.py index bd76582aa6..1be4b72956 100644 --- a/awx/settings/development.py +++ b/awx/settings/development.py @@ -114,7 +114,7 @@ if 'sqlite3' not in DATABASES['default']['ENGINE']: # noqa # this needs to stay at the bottom of this file try: if os.getenv('AWX_KUBE_DEVEL', False): - include(optional('minikube.py'), scope=locals()) + include(optional('development_kube.py'), scope=locals()) else: include(optional('local_*.py'), scope=locals()) except ImportError: diff --git a/awx/settings/minikube.py b/awx/settings/development_kube.py similarity index 80% rename from awx/settings/minikube.py rename to awx/settings/development_kube.py index 0ac81875bc..c30a7fe025 100644 --- a/awx/settings/minikube.py +++ b/awx/settings/development_kube.py @@ -1,4 +1,4 @@ BROADCAST_WEBSOCKET_SECRET = '🤖starscream🤖' -BROADCAST_WEBSOCKET_PORT = 8013 +BROADCAST_WEBSOCKET_PORT = 8052 BROADCAST_WEBSOCKET_VERIFY_CERT = False BROADCAST_WEBSOCKET_PROTOCOL = 'http' From aad1fbcef8f0d9d0424a9431d49a00552650bf44 Mon Sep 17 00:00:00 2001 From: akus062381 Date: Tue, 13 Dec 2022 16:17:42 -0500 Subject: [PATCH 6/6] add new triage reply --- .github/triage_replies.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.github/triage_replies.md b/.github/triage_replies.md index fea0a6f0aa..6df2046a2f 100644 --- a/.github/triage_replies.md +++ b/.github/triage_replies.md @@ -53,6 +53,16 @@ https://github.com/ansible/awx/#get-involved \ Thank you once again for this and your interest in AWX! +### Red Hat Support Team +- Hi! \ +\ +It appears that you are using an RPM build for RHEL. Please reach out to the Red Hat support team and submit a ticket. \ +\ +Here is the link to do so: \ +\ +https://access.redhat.com/support \ +\ +Thank you for your submission and for supporting AWX! ## Common