Add job lifecycle logging

Various	points (e.g. created, running, processing events), are
structured into	json format and	output to /var/log/tower/job_lifecycle.log

As part	of this	work, the DependencyGraph is reworked to return
which job object is doing the blocking, rather than a boolean.
This commit is contained in:
Seth Foster
2021-02-03 14:33:25 -05:00
parent 47de2ddcb5
commit 41d0a2f7b9
12 changed files with 191 additions and 91 deletions
+2
View File
@@ -828,6 +828,7 @@ class Job(UnifiedJob, JobOptions, SurveyJobMixin, JobNotificationMixin, TaskMana
return self.inventory.hosts.only(*only)
def start_job_fact_cache(self, destination, modification_times, timeout=None):
self.log_lifecycle("start_job_fact_cache")
os.makedirs(destination, mode=0o700)
hosts = self._get_inventory_hosts()
if timeout is None:
@@ -852,6 +853,7 @@ class Job(UnifiedJob, JobOptions, SurveyJobMixin, JobNotificationMixin, TaskMana
modification_times[filepath] = os.path.getmtime(filepath)
def finish_job_fact_cache(self, destination, modification_times):
self.log_lifecycle("finish_job_fact_cache")
for host in self._get_inventory_hosts():
filepath = os.sep.join(map(str, [destination, host.name]))
if not os.path.realpath(filepath).startswith(destination):
+22 -3
View File
@@ -55,7 +55,7 @@ from awx.main.fields import JSONField, AskForField, OrderedManyToManyField
__all__ = ['UnifiedJobTemplate', 'UnifiedJob', 'StdoutMaxBytesExceeded']
logger = logging.getLogger('awx.main.models.unified_jobs')
logger_job_lifecycle = logging.getLogger('awx.analytics.job_lifecycle')
# NOTE: ACTIVE_STATES moved to constants because it is used by parent modules
@@ -420,7 +420,7 @@ class UnifiedJobTemplate(PolymorphicModel, CommonModelNameNotUnique, Notificatio
# have been associated to the UJ
if unified_job.__class__ in activity_stream_registrar.models:
activity_stream_create(None, unified_job, True)
unified_job.log_lifecycle("created")
return unified_job
@classmethod
@@ -862,7 +862,7 @@ class UnifiedJob(PolymorphicModel, PasswordFieldsModel, CommonModelNameNotUnique
self.unified_job_template = self._get_parent_instance()
if 'unified_job_template' not in update_fields:
update_fields.append('unified_job_template')
if self.cancel_flag and not self.canceled_on:
# Record the 'canceled' time.
self.canceled_on = now()
@@ -1010,6 +1010,7 @@ class UnifiedJob(PolymorphicModel, PasswordFieldsModel, CommonModelNameNotUnique
event_qs = self.get_event_queryset()
except NotImplementedError:
return True # Model without events, such as WFJT
self.log_lifecycle("event_processing_finished")
return self.emitted_events == event_qs.count()
def result_stdout_raw_handle(self, enforce_max_bytes=True):
@@ -1318,6 +1319,10 @@ class UnifiedJob(PolymorphicModel, PasswordFieldsModel, CommonModelNameNotUnique
if 'extra_vars' in kwargs:
self.handle_extra_data(kwargs['extra_vars'])
# remove any job_explanations that may have been set while job was in pending
if self.job_explanation != "":
self.job_explanation = ""
return (True, opts)
def signal_start(self, **kwargs):
@@ -1484,3 +1489,17 @@ class UnifiedJob(PolymorphicModel, PasswordFieldsModel, CommonModelNameNotUnique
@property
def is_containerized(self):
return False
def log_lifecycle(self, state, blocked_by=None):
extra={'type': self._meta.model_name,
'task_id': self.id,
'state': state}
if self.unified_job_template:
extra["template_name"] = self.unified_job_template.name
if state == "blocked" and blocked_by:
blocked_by_msg = f"{blocked_by._meta.model_name}-{blocked_by.id}"
msg = f"{self._meta.model_name}-{self.id} blocked by {blocked_by_msg}"
extra["blocked_by"] = blocked_by_msg
else:
msg = f"{self._meta.model_name}-{self.id} {state.replace('_', ' ')}"
logger_job_lifecycle.debug(msg, extra=extra)