add settings to define task manager timeout

Also have a seperate setting for a grace period beyond the timeout. This
is so in the future, we can exit after the timeout but before the grace
period.
This commit is contained in:
Elijah DeLee
2022-07-06 23:11:49 -04:00
parent 62fc3994fb
commit 06883dc882
2 changed files with 7 additions and 2 deletions

View File

@@ -406,8 +406,8 @@ class AutoscalePool(WorkerPool):
w.managed_tasks[current_task['uuid']]['started'] = time.time()
age = time.time() - current_task['started']
w.managed_tasks[current_task['uuid']]['age'] = age
if age > (60 * 5):
logger.error(f'run_task_manager has held the advisory lock for >5m, sending SIGTERM to {w.pid}') # noqa
if age > (settings.TASK_MANAGER_TIMEOUT + settings.TASK_MANAGER_TIMEOUT_GRACE_PERIOD):
logger.error(f'run_task_manager has held the advisory lock for {age}, sending SIGTERM to {w.pid}') # noqa
os.kill(w.pid, signal.SIGTERM)
for m in orphaned:

View File

@@ -248,6 +248,11 @@ SUBSYSTEM_METRICS_TASK_MANAGER_RECORD_INTERVAL = 15
# The maximum allowed jobs to start on a given task manager cycle
START_TASK_LIMIT = 100
# Time out task managers if they take longer than this many seconds, plus TASK_MANAGER_TIMEOUT_GRACE_PERIOD
# We have the grace period so the task manager can bail out before the timeout.
TASK_MANAGER_TIMEOUT = 300
TASK_MANAGER_TIMEOUT_GRACE_PERIOD = 60
# Disallow sending session cookies over insecure connections
SESSION_COOKIE_SECURE = True