# -*- coding: utf-8 -*-
# Copyright © Cloud Linux GmbH & Cloud Linux Software, Inc 2010-2021 All Rights Reserved
#
# Licensed under CLOUD LINUX LICENSE AGREEMENT
# http://cloudlinux.com/docs/LICENSE.TXT
"""
This module contains DecisionMaker class
"""
__package__ = 'ssa.modules'
import json
import logging
import os
from os.path import isfile
import numpy as np
from ssa.db import setup_database
from .common import Common
from .storage import (
iter_domains_data,
iter_urls_data,
get_url_durations
)
from ..configuration import load_tunables
from ..configuration.schemes import ssa_tunables_schema
from ..internal.constants import report_path
from ..internal.utils import previous_day_date, sentry_init
class DecisionMaker(Common):
"""
SSA Decision maker implementation.
"""
def __init__(self, engine=None):
super().__init__()
self.logger = logging.getLogger('decision_maker')
self.logger.info('DecisionMaker enabled: %s', __package__)
self.engine = engine if engine else setup_database()
def __call__(self):
self.logger.info('DecisionMaker started')
self.logger.debug('DecisionMaker loaded config: %s', self.config)
self.external_tunables = self.load_external_conf()
self.logger.debug('DecisionMaker loaded tunables: %s',
self.external_tunables)
report = self.data_processing()
self.add_json_report(report)
self.logger.info('DecisionMaker report: %s', report)
return report
@staticmethod
def _report_file(name) -> str:
"""
Full path to given filename in DM reports directory
"""
return os.path.join(report_path, name)
@property
def current_report_file(self) -> str:
"""
Full path to current DM report: report.json in DM reports directory
"""
return self._report_file('report.json')
@property
def _empty_report(self) -> dict:
"""
Returns empty report
"""
return dict(date=previous_day_date(), domains=[])
@property
def solo_filtered_options(self) -> set:
return {'correlation'}
@staticmethod
def load_external_conf():
"""Load external configuration values"""
return load_tunables('ssa.json', ssa_tunables_schema)
def data_processing(self) -> dict:
"""
Going through the list of domains, for each domain we go through
the list of urls. During data processing, we will form
the resulting dictionary.
"""
report = self._empty_report
for domain_data in iter_domains_data(self.engine):
# goes through the list of domains
urls_data = list()
domain_slow_reqs = 0
domain_url_durations = dict(get_url_durations(
self.engine, domain_data.domain_name))
for domain_data_key, domain_data_value in iter_urls_data(self.engine,
domain_data.domain_name,
list(domain_url_durations.keys())):
if self.is_ignored(domain_data_key):
self.logger.debug('%s ignored', domain_data_key)
continue
# goes through the list of urls, "domain_total_reqs" is also here
if domain_data_key not in self.non_url_fields:
# domain_data_key below - it is current url
if not self.is_throttling_suitable(
domain_data_value.get('url_throttled_reqs',
list([0] * 24)),
domain_data_value['url_total_reqs']):
# skip by allowed throttling percentage
continue
correlation_value = self.get_correlation(
domain_data_value['url_total_reqs'], domain_data.domain_total_reqs)
durations = domain_url_durations.get(domain_data_key)
if durations is None:
self.logger.error('Unable to get durations for %s', str(domain_data_key))
continue
if (self.request_number_exceeded(
domain_data_value['url_slow_reqs']) and
self.correlation_conditions(correlation_value)):
average_duration_calculation = np.mean(durations)
sum_url_slow_reqs = sum(
domain_data_value['url_slow_reqs'])
domain_slow_reqs += sum_url_slow_reqs
urls_data.append(dict(
name=domain_data_key, reqs_num=sum_url_slow_reqs,
average_duration=int(average_duration_calculation),
correlation=float(f'{correlation_value:.2f}')))
if urls_data:
sorted_urls = self.report_sorting(
list_to_sort=urls_data, leave_top=self.urls_number,
key_for_sorting='reqs_num')
report['domains'].append(dict(
name=domain_data.domain_name, slow_urls=len(sorted_urls),
slow_reqs=domain_slow_reqs,
total_reqs=sum(domain_data.domain_total_reqs), urls=sorted_urls))
if report['domains']:
report['domains'] = self.report_sorting(
list_to_sort=report['domains'], leave_top=self.domains_number,
key_for_sorting='slow_reqs')
return report
def list_handling_considering_time(self, url_slow_reqs: list) -> list:
"""
Based on the 'url_slow_reqs' list, a new list will be formed,
where the elements of the original list will be iteratively
summed by the number of elements equal to 'time'
"""
time = self.time or 24
return [sum(url_slow_reqs[i:time + i]) for i in
range(0, len(url_slow_reqs), time)]
def compare_elements_with_request_number(self,
url_slow_reqs_by_time: list) -> bool:
"""
This functions will check if any of elements is greater than "request_number"
"""
for i in url_slow_reqs_by_time:
if i >= self.request_number:
return True
return False
def get_correlation(self, url_total_reqs: list, domain_total_reqs: list):
"""
Calculates the correlation coefficient using the "url_total_reqs" and
the "domain_total_reqs" lists
"""
if not self.correlation:
return 0
return np.amin(np.corrcoef(url_total_reqs, domain_total_reqs))
@staticmethod
def report_sorting(list_to_sort: list, leave_top: int,
key_for_sorting: str) -> list:
"""
Will sort the domain list by "slow_reqs", the goal is to leave only
"domains_number" of uppers, also per each domain will sort urls by
"reqs_num", the goal is to leave only "urls_number" of uppers.
leave_top == 0 allows to keep the full list
"""
list_to_sort.sort(key=lambda dict_: dict_[key_for_sorting],
reverse=True)
if leave_top:
return list_to_sort[:leave_top]
else:
return list_to_sort
def rename_old_report(self):
"""
Rename old report
"""
old_report = self.current_report_file
if isfile(old_report):
with open(old_report) as json_data:
try:
d = json.load(json_data)
except json.JSONDecodeError:
date_from_report = 'unknown'
else:
date_from_report = d.get('date', 'dd.mm.yyyy').replace('.', '_')
new_report_name = f'report__{date_from_report}.json'
new_report = self._report_file(new_report_name)
os.rename(old_report, new_report)
def add_json_report(self, report: dict):
"""
Makes json report
"""
self.rename_old_report()
with open(self.current_report_file, 'w', encoding='utf-8') as f:
json.dump(report, f, ensure_ascii=False, indent=4)
def get_json_report(self) -> dict:
"""
Return contents of current report or empty report in case of error
"""
_filtering_hook = None
try:
with open(self.current_report_file) as report:
report_dict = json.load(report, object_hook=_filtering_hook)
except (OSError, json.JSONDecodeError):
report_dict = self._empty_report
return report_dict
def correlation_conditions(self, correlation_value: int) -> bool:
"""
If correlation flag is enabled - we'll compare correlation_coefficient
from configuration with calculated correlation coefficient.
If the calculated value exceeds the configuration value - we return
True otherwise False. At the same time if correlation flag is disabled -
we'll also return "True" since in this case the correlation coefficient
is not checked and its value is specified as zero in final report.
"""
if not self.correlation:
return True
return correlation_value > self.correlation_coefficient
def request_number_exceeded(self, url_slow_reqs):
"""
At least one element from the received list (url_slow_reqs_by_time)
must be greater than request_number
"""
url_slow_reqs_by_time = self.list_handling_considering_time(
url_slow_reqs)
return self.compare_elements_with_request_number(url_slow_reqs_by_time)
def is_throttling_suitable(self, url_throttled_reqs: list,
url_total_reqs: list) -> bool:
"""
Check that percent of throttled requests per URL passes given threshold
"""
throttled_percent = (sum(url_throttled_reqs) / sum(
url_total_reqs)) * 100
self.logger.debug('Calculated throttled percent %s', throttled_percent)
return throttled_percent <= self.external_tunables.get(
'allowed_throttling_percentage', 0)
if __name__ == "__main__":
sentry_init()
logging.basicConfig(filename='decision_maker_standalone.log',
level=logging.INFO)
dm = DecisionMaker()
dm()