Mini Shell
"""
This module contains RequestProcessor class
"""
import shelve
import logging
import os
import time
from datetime import datetime, timedelta, date
from threading import Thread, RLock, current_thread
from .common import Common
from .decision_maker import DecisionMaker
from .stat_sender import StatisticsSender
from ..internal.constants import storage_file
from ..internal.exceptions import SSAError
from ..internal.utils import singleton, url_split
@singleton
class RequestProcessor(Common):
"""
SSA Request processor implementation.
Only one instance is allowed to be created
"""
def __init__(self):
super().__init__()
self.logger = logging.getLogger('req_processor')
self.logger.info('Processor enabled: %s', __package__)
self.total_struct, total_struct_date = self.load_stats()
if not total_struct_date:
self.cleanup()
self._lock = RLock()
self.decision_maker = DecisionMaker()
self.sender = StatisticsSender()
self.start_background_routine()
@property
def configured_duration(self):
"""
Return config file value multiplied by 1000000,
as we receive duration in microseconds
"""
return self.requests_duration * 1000000
def send_stats(self, report: dict):
"""
Call Statistics Sender
"""
try:
self.sender.send(report)
except SSAError as e:
self.logger.error('StatisticsSender failed: %s', str(e))
def start_background_routine(self) -> None:
"""
Start dumper|DecisionMaker thread in background
"""
t = Thread(target=self.background_routine, daemon=True)
t.start()
self.logger.info('[%s] Routine started', t.name)
def background_routine(self) -> None:
"""
Dumps collected stats to file once an hour.
Runs DecisionMaker once a day
Cleanup storage after DecisionMaker run
"""
while True:
tick = datetime.now()
if tick.minute == 0:
self.logger.info('[%s] Routine thread launching dump (%s)',
current_thread().name, tick)
self.dump_collected_stats_to_file()
if tick.hour == 0:
self.logger.info(
'[%s] Routine thread launching DecisionMaker (%s)',
current_thread().name, tick)
report = self.decision_maker()
self.cleanup()
self.send_stats(report)
self._simple_sleep(60)
else:
self._sleep_till_next_hour(tick.minute)
def _simple_sleep(self, to_sleep: int = 15 * 60):
"""
Log and sleep given number of seconds or 15 minutes by default
"""
self.logger.info('[%s] Routine thread sleeping for (%s)',
current_thread().name, to_sleep)
time.sleep(to_sleep)
def _sleep_till_next_hour(self, start_minute):
"""
Sleep the number of minutes remaining till next hour
"""
sleep_for = (timedelta(hours=1) - timedelta(
minutes=start_minute)).total_seconds()
self._simple_sleep(int(sleep_for))
def dump_collected_stats_to_file(self) -> dict:
"""
Dump collected stats to file
"""
with self._lock:
self.logger.debug('[%s] Acquires lock to dump stats',
current_thread().name)
self.add_current_date()
try:
with shelve.open(storage_file) as db:
for item, value in self.total_struct.items():
db[item] = value
dump_result = {k: v for k, v in db.items()}
except OSError as e:
self.logger.error(
'Failed to dump data',
extra={'err': str(e)})
dump_result = dict()
self.logger.debug('[%s] Released lock to dump stats',
current_thread().name)
return dump_result
@staticmethod
def get_interval_for(timestamp: int) -> int:
"""
Takes an hour of a day, to which the given timestamp belongs
"""
return datetime.fromtimestamp(timestamp).hour
def add_current_date(self) -> None:
"""
Adds current date to the 'total struct' dict before dumping to a file
"""
self.total_struct['current_date'] = date.today()
def add_domain(self, name: str) -> None:
"""
Add new domain sub-struct if it is not already present
"""
if name not in self.total_struct:
self.logger.debug('[%s] New domain received: %s',
current_thread().name, name)
self.total_struct[name] = dict(
domain_total_reqs=list([0] * 24)
)
def add_url(self, domain: str, url: str) -> None:
"""
Add new URL sub-struct if it is not already present
"""
if url not in self.total_struct[domain]:
self.logger.debug('[%s] New URL received: %s',
current_thread().name, url)
self.total_struct[domain][url] = dict(
url_total_reqs=list([0] * 24),
url_slow_reqs=list([0] * 24),
durations=list()
)
def update_data(self, *, domain: str, url: str, timestamp: int,
duration: float) -> None:
"""
Update request counters for given domain and url,
save request duration
"""
interval = self.get_interval_for(timestamp)
self.total_struct[domain]['domain_total_reqs'][interval] += 1
self.total_struct[domain][url]['url_total_reqs'][interval] += 1
if duration > self.configured_duration:
self.total_struct[domain][url]['url_slow_reqs'][interval] += 1
self.total_struct[domain][url]['durations'].append(duration)
self.logger.info('[%s] Request to %s processed',
current_thread().name, url)
self.logger.debug('[%s] %s', current_thread().name, self.total_struct)
def handle(self, data: dict) -> None:
"""
Process given request data
"""
url = data.get('url')
if self.is_ignored(url):
self.logger.debug('%s ignored', url)
return
domain, uri = url_split(url)
with self._lock:
self.logger.debug('[%s] Acquires lock to handle request counters',
current_thread().name)
self.add_domain(domain)
self.add_url(domain, url)
self.update_data(domain=domain,
url=url,
timestamp=int(data.get('timestamp')),
duration=float(data.get('duration')))
self.logger.debug('[%s] Released lock to handle request counters',
current_thread().name)
def cleanup(self):
"""
Cleanup storage and total_struct
"""
self.logger.info('RequestProcessor cleanup...')
try:
os.unlink(storage_file)
except OSError:
self.logger.info('Already no storage file')
self.total_struct = dict()
Zerion Mini Shell 1.0