# Copyright (c) 2015 Ansible, Inc.
# All Rights Reserved.

from decimal import Decimal
import logging
import os

from django.core.validators import MinValueValidator, MaxValueValidator
from django.db import models, connection
from django.db.models.signals import post_save, post_delete
from django.dispatch import receiver
from django.utils.translation import gettext_lazy as _
from django.conf import settings
from django.utils.timezone import now, timedelta
from django.db.models import Sum

import redis
from solo.models import SingletonModel

from awx import __version__ as awx_application_version
from awx.api.versioning import reverse
from awx.main.fields import JSONBlob
from awx.main.managers import InstanceManager, UUID_DEFAULT
from awx.main.constants import JOB_FOLDER_PREFIX
from awx.main.models.base import BaseModel, HasEditsMixin, prevent_search
from awx.main.models.unified_jobs import UnifiedJob
from awx.main.utils.common import get_corrected_cpu, get_cpu_effective_capacity, get_corrected_memory, get_mem_effective_capacity
from awx.main.models.mixins import RelatedJobsMixin

# ansible-runner
from ansible_runner.utils.capacity import get_cpu_count, get_mem_in_bytes

__all__ = ('Instance', 'InstanceGroup', 'InstanceLink', 'TowerScheduleState')

logger = logging.getLogger('awx.main.models.ha')


class HasPolicyEditsMixin(HasEditsMixin):
    class Meta:
        abstract = True

    def __init__(self, *args, **kwargs):
        r = super(BaseModel, self).__init__(*args, **kwargs)
        self._prior_values_store = self._get_fields_snapshot()
        return r

    def save(self, *args, **kwargs):
        super(BaseModel, self).save(*args, **kwargs)
        self._prior_values_store = self._get_fields_snapshot()

    def has_policy_changes(self):
        if not hasattr(self, 'POLICY_FIELDS'):
            raise RuntimeError('HasPolicyEditsMixin Model needs to set POLICY_FIELDS')
        new_values = self._get_fields_snapshot(fields_set=self.POLICY_FIELDS)
        return self._values_have_edits(new_values)


class InstanceLink(BaseModel):
    source = models.ForeignKey('Instance', on_delete=models.CASCADE, related_name='+')
    target = models.ForeignKey('Instance', on_delete=models.CASCADE, related_name='reverse_peers')

    class States(models.TextChoices):
        ADDING = 'adding', _('Adding')
        ESTABLISHED = 'established', _('Established')
        REMOVING = 'removing', _('Removing')

    link_state = models.CharField(
        choices=States.choices, default=States.ESTABLISHED, max_length=16, help_text=_("Indicates the current life cycle stage of this peer link.")
    )

    class Meta:
        unique_together = ('source', 'target')


class Instance(HasPolicyEditsMixin, BaseModel):
    """A model representing an AWX instance running against this database."""

    objects = InstanceManager()

    # Fields set in instance registration
    uuid = models.CharField(max_length=40, default=UUID_DEFAULT)
    hostname = models.CharField(max_length=250, unique=True)
    ip_address = models.CharField(
        blank=True,
        null=True,
        default=None,
        max_length=50,
        unique=True,
    )
    # Auto-fields, implementation is different from BaseModel
    created = models.DateTimeField(auto_now_add=True)
    modified = models.DateTimeField(auto_now=True)
    # Fields defined in health check or heartbeat
    version = models.CharField(max_length=120, blank=True)
    cpu = models.DecimalField(
        default=Decimal(0.0),
        max_digits=4,
        decimal_places=1,
        editable=False,
    )
    memory = models.BigIntegerField(
        default=0,
        editable=False,
        help_text=_('Total system memory of this instance in bytes.'),
    )
    errors = models.TextField(
        default='',
        blank=True,
        editable=False,
        help_text=_('Any error details from the last health check.'),
    )
    last_seen = models.DateTimeField(
        null=True,
        editable=False,
        help_text=_('Last time instance ran its heartbeat task for main cluster nodes. Last known connection to receptor mesh for execution nodes.'),
    )
    last_health_check = models.DateTimeField(
        null=True,
        editable=False,
        help_text=_('Last time a health check was ran on this instance to refresh cpu, memory, and capacity.'),
    )
    # Capacity management
    capacity = models.PositiveIntegerField(
        default=100,
        editable=False,
    )
    capacity_adjustment = models.DecimalField(default=Decimal(1.0), max_digits=3, decimal_places=2, validators=[MinValueValidator(0)])
    enabled = models.BooleanField(default=True)
    managed_by_policy = models.BooleanField(default=True)

    cpu_capacity = models.IntegerField(
        default=0,
        editable=False,
    )
    mem_capacity = models.IntegerField(
        default=0,
        editable=False,
    )

    class Types(models.TextChoices):
        CONTROL = 'control', _("Control plane node")
        EXECUTION = 'execution', _("Execution plane node")
        HYBRID = 'hybrid', _("Controller and execution")
        HOP = 'hop', _("Message-passing node, no execution capability")

    node_type = models.CharField(default=Types.HYBRID, choices=Types.choices, max_length=16, help_text=_("Role that this node plays in the mesh."))

    class States(models.TextChoices):
        PROVISIONING = 'provisioning', _('Provisioning')
        PROVISION_FAIL = 'provision-fail', _('Provisioning Failure')
        INSTALLED = 'installed', _('Installed')
        READY = 'ready', _('Ready')
        UNAVAILABLE = 'unavailable', _('Unavailable')
        DEPROVISIONING = 'deprovisioning', _('De-provisioning')
        DEPROVISION_FAIL = 'deprovision-fail', _('De-provisioning Failure')

    node_state = models.CharField(
        choices=States.choices, default=States.READY, max_length=16, help_text=_("Indicates the current life cycle stage of this instance.")
    )
    listener_port = models.PositiveIntegerField(
        blank=True,
        default=27199,
        validators=[MinValueValidator(1), MaxValueValidator(65535)],
        help_text=_("Port that Receptor will listen for incoming connections on."),
    )

    peers = models.ManyToManyField('self', symmetrical=False, through=InstanceLink, through_fields=('source', 'target'))

    class Meta:
        app_label = 'main'
        ordering = ("hostname",)

    POLICY_FIELDS = frozenset(('managed_by_policy', 'hostname', 'capacity_adjustment'))

    def get_absolute_url(self, request=None):
        return reverse('api:instance_detail', kwargs={'pk': self.pk}, request=request)

    @property
    def consumed_capacity(self):
        capacity_consumed = 0
        if self.node_type in ('hybrid', 'execution'):
            capacity_consumed += (
                UnifiedJob.objects.filter(execution_node=self.hostname, status__in=('running', 'waiting')).aggregate(Sum("task_impact"))["task_impact__sum"]
                or 0
            )
        if self.node_type in ('hybrid', 'control'):
            capacity_consumed += (
                settings.AWX_CONTROL_NODE_TASK_IMPACT * UnifiedJob.objects.filter(controller_node=self.hostname, status__in=('running', 'waiting')).count()
            )
        return capacity_consumed

    @property
    def remaining_capacity(self):
        return self.capacity - self.consumed_capacity

    @property
    def jobs_running(self):
        return UnifiedJob.objects.filter(
            execution_node=self.hostname,
            status__in=(
                'running',
                'waiting',
            ),
        ).count()

    @property
    def jobs_total(self):
        return UnifiedJob.objects.filter(execution_node=self.hostname).count()

    def get_cleanup_task_kwargs(self, **kwargs):
        """
        Produce options to use for the command: ansible-runner worker cleanup
        returns a dict that is passed to the python interface for the runner method corresponding to that command
        any kwargs will override that key=value combination in the returned dict
        """
        vargs = dict()
        if settings.AWX_CLEANUP_PATHS:
            vargs['file_pattern'] = os.path.join(settings.AWX_ISOLATION_BASE_PATH, JOB_FOLDER_PREFIX % '*') + '*'
        vargs.update(kwargs)
        if not isinstance(vargs.get('grace_period'), int):
            vargs['grace_period'] = 60  # grace period of 60 minutes, need to set because CLI default will not take effect
        if 'exclude_strings' not in vargs and vargs.get('file_pattern'):
            active_pks = list(
                UnifiedJob.objects.filter(
                    (models.Q(execution_node=self.hostname) | models.Q(controller_node=self.hostname)) & models.Q(status__in=('running', 'waiting'))
                ).values_list('pk', flat=True)
            )
            if active_pks:
                vargs['exclude_strings'] = [JOB_FOLDER_PREFIX % job_id for job_id in active_pks]
        if 'remove_images' in vargs or 'image_prune' in vargs:
            vargs.setdefault('process_isolation_executable', 'podman')
        return vargs

    def is_lost(self, ref_time=None):
        if self.last_seen is None:
            return True
        if ref_time is None:
            ref_time = now()
        grace_period = settings.CLUSTER_NODE_HEARTBEAT_PERIOD * settings.CLUSTER_NODE_MISSED_HEARTBEAT_TOLERANCE
        if self.node_type in ('execution', 'hop'):
            grace_period += settings.RECEPTOR_SERVICE_ADVERTISEMENT_PERIOD
        return self.last_seen < ref_time - timedelta(seconds=grace_period)

    def mark_offline(self, update_last_seen=False, perform_save=True, errors=''):
        if self.node_state not in (Instance.States.READY, Instance.States.UNAVAILABLE, Instance.States.INSTALLED):
            return
        if self.node_state == Instance.States.UNAVAILABLE and self.errors == errors and (not update_last_seen):
            return
        self.node_state = Instance.States.UNAVAILABLE
        self.cpu_capacity = self.mem_capacity = self.capacity = 0
        self.errors = errors
        if update_last_seen:
            self.last_seen = now()

        if perform_save:
            update_fields = ['node_state', 'capacity', 'cpu_capacity', 'mem_capacity', 'errors']
            if update_last_seen:
                update_fields += ['last_seen']
            self.save(update_fields=update_fields)

    def set_capacity_value(self):
        """Sets capacity according to capacity adjustment rule (no save)"""
        if self.enabled and self.node_type != 'hop':
            lower_cap = min(self.mem_capacity, self.cpu_capacity)
            higher_cap = max(self.mem_capacity, self.cpu_capacity)
            self.capacity = lower_cap + (higher_cap - lower_cap) * self.capacity_adjustment
        else:
            self.capacity = 0

    def refresh_capacity_fields(self):
        """Update derived capacity fields from cpu and memory (no save)"""
        if self.node_type == 'hop':
            self.cpu_capacity = 0
            self.mem_capacity = 0  # formula has a non-zero offset, so we make sure it is 0 for hop nodes
        else:
            self.cpu_capacity = get_cpu_effective_capacity(self.cpu)
            self.mem_capacity = get_mem_effective_capacity(self.memory)
        self.set_capacity_value()

    def save_health_data(self, version=None, cpu=0, memory=0, uuid=None, update_last_seen=False, errors=''):
        update_fields = ['errors']
        if self.node_type != 'hop':
            self.last_health_check = now()
            update_fields.append('last_health_check')

        if update_last_seen:
            self.last_seen = self.last_health_check
            update_fields.append('last_seen')

        if uuid is not None and self.uuid != uuid:
            if self.uuid is not None:
                logger.warning(f'Self-reported uuid of {self.hostname} changed from {self.uuid} to {uuid}')
            self.uuid = uuid
            update_fields.append('uuid')

        if version is not None and self.version != version:
            self.version = version
            update_fields.append('version')

        new_cpu = get_corrected_cpu(cpu)
        if new_cpu != self.cpu:
            self.cpu = new_cpu
            update_fields.append('cpu')

        new_memory = get_corrected_memory(memory)
        if new_memory != self.memory:
            self.memory = new_memory
            update_fields.append('memory')

        if not errors:
            self.refresh_capacity_fields()
            self.errors = ''
            if self.node_state in (Instance.States.UNAVAILABLE, Instance.States.INSTALLED):
                self.node_state = Instance.States.READY
                update_fields.append('node_state')
        else:
            self.mark_offline(perform_save=False, errors=errors)
        update_fields.extend(['cpu_capacity', 'mem_capacity', 'capacity'])

        # disabling activity stream will avoid extra queries, which is important for heatbeat actions
        from awx.main.signals import disable_activity_stream

        with disable_activity_stream():
            self.save(update_fields=update_fields)

    def local_health_check(self):
        """Only call this method on the instance that this record represents"""
        errors = None
        try:
            # if redis is down for some reason, that means we can't persist
            # playbook event data; we should consider this a zero capacity event
            redis.Redis.from_url(settings.BROKER_URL).ping()
        except redis.ConnectionError:
            errors = _('Failed to connect to Redis')

        self.save_health_data(awx_application_version, get_cpu_count(), get_mem_in_bytes(), update_last_seen=True, errors=errors)


class InstanceGroup(HasPolicyEditsMixin, BaseModel, RelatedJobsMixin):
    """A model representing a Queue/Group of AWX Instances."""

    name = models.CharField(max_length=250, unique=True)
    created = models.DateTimeField(auto_now_add=True)
    modified = models.DateTimeField(auto_now=True)
    instances = models.ManyToManyField(
        'Instance',
        related_name='rampart_groups',
        editable=False,
        help_text=_('Instances that are members of this InstanceGroup'),
    )
    is_container_group = models.BooleanField(default=False)
    credential = models.ForeignKey(
        'Credential',
        related_name='%(class)ss',
        blank=True,
        null=True,
        default=None,
        on_delete=models.SET_NULL,
    )
    pod_spec_override = prevent_search(
        models.TextField(
            blank=True,
            default='',
        )
    )
    policy_instance_percentage = models.IntegerField(default=0, help_text=_("Percentage of Instances to automatically assign to this group"))
    policy_instance_minimum = models.IntegerField(default=0, help_text=_("Static minimum number of Instances to automatically assign to this group"))
    policy_instance_list = JSONBlob(
        default=list, blank=True, help_text=_("List of exact-match Instances that will always be automatically assigned to this group")
    )

    POLICY_FIELDS = frozenset(('policy_instance_list', 'policy_instance_minimum', 'policy_instance_percentage'))

    def get_absolute_url(self, request=None):
        return reverse('api:instance_group_detail', kwargs={'pk': self.pk}, request=request)

    @property
    def capacity(self):
        return sum(inst.capacity for inst in self.instances.all())

    @property
    def jobs_running(self):
        return UnifiedJob.objects.filter(status__in=('running', 'waiting'), instance_group=self).count()

    @property
    def jobs_total(self):
        return UnifiedJob.objects.filter(instance_group=self).count()

    '''
    RelatedJobsMixin
    '''

    def _get_related_jobs(self):
        return UnifiedJob.objects.filter(instance_group=self)

    class Meta:
        app_label = 'main'

    def set_default_policy_fields(self):
        self.policy_instance_list = []
        self.policy_instance_minimum = 0
        self.policy_instance_percentage = 0


class TowerScheduleState(SingletonModel):
    schedule_last_run = models.DateTimeField(auto_now_add=True)


def schedule_policy_task():
    from awx.main.tasks.system import apply_cluster_membership_policies

    connection.on_commit(lambda: apply_cluster_membership_policies.apply_async())


@receiver(post_save, sender=InstanceGroup)
def on_instance_group_saved(sender, instance, created=False, raw=False, **kwargs):
    if created or instance.has_policy_changes():
        if not instance.is_container_group:
            schedule_policy_task()
    elif created or instance.is_container_group:
        instance.set_default_policy_fields()


@receiver(post_save, sender=Instance)
def on_instance_saved(sender, instance, created=False, raw=False, **kwargs):
    # TODO: handle update to instance
    if settings.IS_K8S and created and instance.node_type in ('execution', 'hop'):
        from awx.main.tasks.receptor import write_receptor_config  # prevents circular import

        # on commit broadcast to all control instance to update their receptor configs
        connection.on_commit(lambda: write_receptor_config.apply_async(queue='tower_broadcast_all'))

    if created or instance.has_policy_changes():
        schedule_policy_task()


@receiver(post_delete, sender=InstanceGroup)
def on_instance_group_deleted(sender, instance, using, **kwargs):
    if not instance.is_container_group:
        schedule_policy_task()


@receiver(post_delete, sender=Instance)
def on_instance_deleted(sender, instance, using, **kwargs):
    schedule_policy_task()


class UnifiedJobTemplateInstanceGroupMembership(models.Model):

    unifiedjobtemplate = models.ForeignKey('UnifiedJobTemplate', on_delete=models.CASCADE)
    instancegroup = models.ForeignKey('InstanceGroup', on_delete=models.CASCADE)
    position = models.PositiveIntegerField(
        null=True,
        default=None,
        db_index=True,
    )


class OrganizationInstanceGroupMembership(models.Model):

    organization = models.ForeignKey('Organization', on_delete=models.CASCADE)
    instancegroup = models.ForeignKey('InstanceGroup', on_delete=models.CASCADE)
    position = models.PositiveIntegerField(
        null=True,
        default=None,
        db_index=True,
    )


class InventoryInstanceGroupMembership(models.Model):

    inventory = models.ForeignKey('Inventory', on_delete=models.CASCADE)
    instancegroup = models.ForeignKey('InstanceGroup', on_delete=models.CASCADE)
    position = models.PositiveIntegerField(
        null=True,
        default=None,
        db_index=True,
    )


class JobLaunchConfigInstanceGroupMembership(models.Model):

    joblaunchconfig = models.ForeignKey('JobLaunchConfig', on_delete=models.CASCADE)
    instancegroup = models.ForeignKey('InstanceGroup', on_delete=models.CASCADE)
    position = models.PositiveIntegerField(
        null=True,
        default=None,
        db_index=True,
    )


class ScheduleInstanceGroupMembership(models.Model):

    schedule = models.ForeignKey('Schedule', on_delete=models.CASCADE)
    instancegroup = models.ForeignKey('InstanceGroup', on_delete=models.CASCADE)
    position = models.PositiveIntegerField(
        null=True,
        default=None,
        db_index=True,
    )


class WorkflowJobTemplateNodeBaseInstanceGroupMembership(models.Model):

    workflowjobtemplatenode = models.ForeignKey('WorkflowJobTemplateNode', on_delete=models.CASCADE)
    instancegroup = models.ForeignKey('InstanceGroup', on_delete=models.CASCADE)
    position = models.PositiveIntegerField(
        null=True,
        default=None,
        db_index=True,
    )


class WorkflowJobNodeBaseInstanceGroupMembership(models.Model):

    workflowjobnode = models.ForeignKey('WorkflowJobNode', on_delete=models.CASCADE)
    instancegroup = models.ForeignKey('InstanceGroup', on_delete=models.CASCADE)
    position = models.PositiveIntegerField(
        null=True,
        default=None,
        db_index=True,
    )


class WorkflowJobInstanceGroupMembership(models.Model):

    workflowjobnode = models.ForeignKey('WorkflowJob', on_delete=models.CASCADE)
    instancegroup = models.ForeignKey('InstanceGroup', on_delete=models.CASCADE)
    position = models.PositiveIntegerField(
        null=True,
        default=None,
        db_index=True,
    )