mirror of
https://github.com/ZwareBear/awx.git
synced 2026-04-11 10:31:48 -05:00
Instance has gone lost, and jobs are still either running or waiting inside of its instance group RBAC - user does not have permission to see some of the groups that would be used in the capacity calculation For some cases, a naive capacity dictionary is returned, main goal is to not throw errors and avoid unpredicted behavior Detailed capacity tests are moved into new unit test file.
195 lines
8.1 KiB
Python
195 lines
8.1 KiB
Python
# Copyright (c) 2015 Ansible, Inc.
|
|
# All Rights Reserved.
|
|
|
|
import sys
|
|
from datetime import timedelta
|
|
import logging
|
|
|
|
from django.db import models
|
|
from django.utils.timezone import now
|
|
from django.db.models import Sum
|
|
from django.conf import settings
|
|
|
|
from awx.main.utils.filters import SmartFilter
|
|
|
|
___all__ = ['HostManager', 'InstanceManager', 'InstanceGroupManager']
|
|
|
|
logger = logging.getLogger('awx.main.managers')
|
|
|
|
|
|
class HostManager(models.Manager):
|
|
"""Custom manager class for Hosts model."""
|
|
|
|
def active_count(self):
|
|
"""Return count of active, unique hosts for licensing."""
|
|
try:
|
|
return self.order_by('name').distinct('name').count()
|
|
except NotImplementedError: # For unit tests only, SQLite doesn't support distinct('name')
|
|
return len(set(self.values_list('name', flat=True)))
|
|
|
|
def get_queryset(self):
|
|
"""When the parent instance of the host query set has a `kind=smart` and a `host_filter`
|
|
set. Use the `host_filter` to generate the queryset for the hosts.
|
|
"""
|
|
qs = super(HostManager, self).get_queryset()
|
|
if (hasattr(self, 'instance') and
|
|
hasattr(self.instance, 'host_filter') and
|
|
hasattr(self.instance, 'kind')):
|
|
if self.instance.kind == 'smart' and self.instance.host_filter is not None:
|
|
q = SmartFilter.query_from_string(self.instance.host_filter)
|
|
# If we are using host_filters, disable the core_filters, this allows
|
|
# us to access all of the available Host entries, not just the ones associated
|
|
# with a specific FK/relation.
|
|
#
|
|
# If we don't disable this, a filter of {'inventory': self.instance} gets automatically
|
|
# injected by the related object mapper.
|
|
self.core_filters = {}
|
|
|
|
qs = qs & q
|
|
unique_by_name = qs.order_by('name', 'pk').distinct('name')
|
|
return qs.filter(pk__in=unique_by_name)
|
|
return qs
|
|
|
|
|
|
def get_ig_ig_mapping(ig_instance_mapping, instance_ig_mapping):
|
|
# Create IG mapping by union of all groups their instances are members of
|
|
ig_ig_mapping = {}
|
|
for group_name in ig_instance_mapping.keys():
|
|
ig_ig_set = set()
|
|
for instance_hostname in ig_instance_mapping[group_name]:
|
|
ig_ig_set |= instance_ig_mapping[instance_hostname]
|
|
else:
|
|
ig_ig_set.add(group_name) # Group contains no instances, return self
|
|
ig_ig_mapping[group_name] = ig_ig_set
|
|
return ig_ig_mapping
|
|
|
|
|
|
class InstanceManager(models.Manager):
|
|
"""A custom manager class for the Instance model.
|
|
|
|
Provides "table-level" methods including getting the currently active
|
|
instance or role.
|
|
"""
|
|
def me(self):
|
|
"""Return the currently active instance."""
|
|
# If we are running unit tests, return a stub record.
|
|
if settings.IS_TESTING(sys.argv):
|
|
return self.model(id=1,
|
|
hostname='localhost',
|
|
uuid='00000000-0000-0000-0000-000000000000')
|
|
|
|
node = self.filter(hostname=settings.CLUSTER_HOST_ID)
|
|
if node.exists():
|
|
return node[0]
|
|
raise RuntimeError("No instance found with the current cluster host id")
|
|
|
|
def active_count(self):
|
|
"""Return count of active Tower nodes for licensing."""
|
|
return self.all().count()
|
|
|
|
def total_capacity(self):
|
|
sumval = self.filter(modified__gte=now() - timedelta(seconds=settings.AWX_ACTIVE_NODE_TIME)) \
|
|
.aggregate(total_capacity=Sum('capacity'))['total_capacity']
|
|
return max(50, sumval)
|
|
|
|
def my_role(self):
|
|
# NOTE: TODO: Likely to repurpose this once standalone ramparts are a thing
|
|
return "tower"
|
|
|
|
|
|
class InstanceGroupManager(models.Manager):
|
|
"""A custom manager class for the Instance model.
|
|
|
|
Used for global capacity calculations
|
|
"""
|
|
|
|
def capacity_mapping(self, qs=None):
|
|
"""
|
|
Another entry-point to Instance manager method by same name
|
|
"""
|
|
if qs is None:
|
|
qs = self.all().prefetch_related('instances')
|
|
instance_ig_mapping = {}
|
|
ig_instance_mapping = {}
|
|
# Create dictionaries that represent basic m2m memberships
|
|
for group in qs:
|
|
ig_instance_mapping[group.name] = set(
|
|
instance.hostname for instance in group.instances.all() if
|
|
instance.capacity != 0
|
|
)
|
|
for inst in group.instances.all():
|
|
if inst.capacity == 0:
|
|
continue
|
|
instance_ig_mapping.setdefault(inst.hostname, set())
|
|
instance_ig_mapping[inst.hostname].add(group.name)
|
|
# Get IG capacity overlap mapping
|
|
ig_ig_mapping = get_ig_ig_mapping(ig_instance_mapping, instance_ig_mapping)
|
|
|
|
return instance_ig_mapping, ig_ig_mapping
|
|
|
|
@staticmethod
|
|
def zero_out_group(graph, name, breakdown):
|
|
if name not in graph:
|
|
graph[name] = {}
|
|
graph[name]['consumed_capacity'] = 0
|
|
if breakdown:
|
|
graph[name]['committed_capacity'] = 0
|
|
graph[name]['running_capacity'] = 0
|
|
|
|
def capacity_values(self, qs=None, tasks=None, breakdown=False, graph=None):
|
|
"""
|
|
Returns a dictionary of capacity values for all IGs
|
|
"""
|
|
if qs is None: # Optionally BYOQS - bring your own queryset
|
|
qs = self.all().prefetch_related('instances')
|
|
instance_ig_mapping, ig_ig_mapping = self.capacity_mapping(qs=qs)
|
|
|
|
if tasks is None:
|
|
tasks = self.model.unifiedjob_set.related.related_model.objects.filter(
|
|
status__in=('running', 'waiting'))
|
|
|
|
if graph is None:
|
|
graph = {group.name: {} for group in qs}
|
|
for group_name in graph:
|
|
self.zero_out_group(graph, group_name, breakdown)
|
|
for t in tasks:
|
|
# TODO: dock capacity for isolated job management tasks running in queue
|
|
impact = t.task_impact
|
|
if t.status == 'waiting' or not t.execution_node:
|
|
# Subtract capacity from any peer groups that share instances
|
|
if not t.instance_group:
|
|
logger.warning('Excluded %s from capacity algorithm '
|
|
'(missing instance_group).', t.log_format)
|
|
impacted_groups = []
|
|
elif t.instance_group.name not in ig_ig_mapping:
|
|
# Waiting job in group with 0 capacity has no collateral impact
|
|
impacted_groups = [t.instance_group.name]
|
|
else:
|
|
impacted_groups = ig_ig_mapping[t.instance_group.name]
|
|
for group_name in impacted_groups:
|
|
if group_name not in graph:
|
|
self.zero_out_group(graph, group_name, breakdown)
|
|
graph[group_name]['consumed_capacity'] += impact
|
|
if breakdown:
|
|
graph[group_name]['committed_capacity'] += impact
|
|
elif t.status == 'running':
|
|
# Subtract capacity from all groups that contain the instance
|
|
if t.execution_node not in instance_ig_mapping:
|
|
logger.warning('Detected %s running inside lost instance, '
|
|
'may still be waiting for reaper.', t.log_format)
|
|
if t.instance_group:
|
|
impacted_groups = [t.instance_group.name]
|
|
else:
|
|
impacted_groups = []
|
|
else:
|
|
impacted_groups = instance_ig_mapping[t.execution_node]
|
|
for group_name in impacted_groups:
|
|
if group_name not in graph:
|
|
self.zero_out_group(graph, group_name, breakdown)
|
|
graph[group_name]['consumed_capacity'] += impact
|
|
if breakdown:
|
|
graph[group_name]['running_capacity'] += impact
|
|
else:
|
|
logger.error('Programming error, %s not in ["running", "waiting"]', t.log_format)
|
|
return graph
|