Source code for sdss_brain.mixins.mma

# !/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Filename: mma.py
# Project: mixins
# Author: Brian Cherinka
# Created: Thursday, 8th October 2020 11:23:28 am
# License: BSD 3-clause "New" or "Revised" License
# Copyright (c) 2020 Brian Cherinka
# Last Modified: Thursday, 8th October 2020 11:23:28 am
# Modified By: Brian Cherinka


from __future__ import print_function, division, absolute_import

import abc
import six
import pathlib
import os

from sdss_brain import log
from sdss_brain.mixins.access import AccessMixIn
from sdss_brain.config import config
from sdss_brain.exceptions import BrainError


__all__ = ['MMAMixIn', 'MMAccess']


[docs]class MMAMixIn(abc.ABC): ''' Mixin for implementing multi-modal data access This is a mixin class that adds multi-modal data access to any class that subclasses from this one. The MMA allows toggling between local and remote data access modes, or leaving it on automatic. Local mode access tries to load data via a database, if one exists, otherwise it loads data via a local filepath. Remote mode will try to load data over an API. When the mode is set to "auto", it automatically tries to first load things locally, and then remotely. Depending on the mode and logic, the MMA will set data_origin to either `file`, `db`, or `api`. Note that this class does not provide the logic for loading data from a db, over an API, or from a file. The user must provide that logic in a subclass. This mixin contains three abstractmethods you must override when subclassing. - **_parse_inputs**: provides logic to parse ``data_input`` into either filename or objectid - **download**: a method for downloading a data file to a local disk - **get_full_path**: a method for generating the absolute file path on disk to a file Parameters ---------- data_input : str The file or name of target data to load filename : str The absolute filepath to data to load objectid : str The object identifier of the data to load mode : str The operating mode: auto, local, or remote release : str The data release of the object, e.g. "DR16" download : bool If True, downloads the object locally with sdss_access ignore_db : bool If True, ignores any database connection for local access use_db : `~sdssdb.connection.DatabaseConnection` a database connection to override the default with Attributes ---------- release : str The current data release loaded ''' def __init__(self, data_input=None, filename=None, objectid=None, mode=None, release=None, download=None, ignore_db=False, use_db=None): # data attributes self._db = use_db self.filename = filename self.objectid = objectid self.data_origin = None # inputs or config variables self.mode = mode or config.mode self._release = release or config.release self._forcedownload = download or config.download self._ignore_db = ignore_db or config.ignore_db # determine the input self._determine_inputs(data_input) assert self.mode in ['auto', 'local', 'remote'] assert self.filename is not None or self.objectid is not None, 'no inputs set.' # perform the multi-modal data access if self.mode == 'local': self._do_local() elif self.mode == 'remote': self._do_remote() elif self.mode == 'auto': try: self._do_local() except BrainError as ee: if self.filename: # If the input contains a filename we don't want to go into remote mode. raise(ee) else: log.debug('local mode failed. Trying remote now.') self._do_remote() # Sanity check to make sure data_origin has been properly set. assert self.data_origin in ['file', 'db', 'api'], 'data_origin is not properly set.' @property def release(self): """ Returns the release. """ return self._release @release.setter def release(self, value): """Fails when trying to set the release after instantiation.""" raise BrainError('the release cannot be changed once the object has been instantiated.') def _do_local(self): """ Check if it's possible to load the data locally.""" if self.filename: # check if the file exists locally if self.filename.exists(): self.mode = 'local' self.data_origin = 'file' else: raise BrainError('input file {0} not found'.format(self.filename)) elif self.objectid: # prioritize a database unless explicitly set to ignore if self._db and self._db.connected and not self._ignore_db: self.mode = 'local' self.data_origin = 'db' else: # retrieve the full local sdss_access path fullpath = self.get_full_path() if fullpath and os.path.exists(fullpath): self.mode = 'local' self.filename = fullpath self.data_origin = 'file' else: # optionally download the file if self._forcedownload: self.download() self.data_origin = 'file' else: raise BrainError('failed to retrieve data using ' 'input parameters.') def _do_remote(self): """ Check if remote connection is possible.""" if self.filename: raise BrainError('filename not allowed in remote mode.') else: self.mode = 'remote' self.data_origin = 'api' def _determine_inputs(self, data_input): """ Determines what inputs to use in the decision tree. """ if data_input: assert self.filename is None and self.objectid is None, \ 'if input is set, filename and objectid cannot be set.' assert isinstance(data_input, (six.string_types, pathlib.Path)), \ 'input must be a string or pathlib.Path' # parse the input data into either a filename or objectid parsed_input = self._parse_input(data_input) if not parsed_input: self.filename = data_input else: assert isinstance( parsed_input, dict), 'return value of _parse_input must be a dict' self.filename = parsed_input.get('filename', None) self.objectid = parsed_input.get('objectid', None) # ensure either filename or objectid is specified if self.filename is None and self.objectid is None: raise BrainError('no inputs defined. filename and objectid are both None') # convert filename to a pathlib.Path and resolve a relative name if self.filename: self.filename = pathlib.Path(self.filename).resolve() # attempt to update the access path parameters from the filename or parsed data input self._update_access_params() # check for any misaligments and misassignments if self.filename: self.objectid = None if self.mode == 'remote': raise BrainError('filename not allowed in remote mode.') assert self.filename.exists, \ 'filename {} does not exist.'.format(str(self.filename)) elif self.objectid: assert not self.filename, 'invalid set of inputs.' @abc.abstractmethod def _parse_input(self, value): ''' Parses the input value to determine the kind of input This method must be overridden by each subclass and contains the logic to determine the kind of input passed into it, i.e. either a filename or an object identification string. This method accepts a single argument which is the string `data_input` and must return a dictionary containing at least keys for "filename" and "objectid". '''
[docs] @abc.abstractmethod def download(self): ''' Abstract method to download a file ''' pass
[docs] @abc.abstractmethod def get_full_path(self): ''' Abstract method to return a full local file path ''' pass
@property def is_access_mixedin(self): ''' Checks if the `~sdss_brain.mixins.access.AccessMixIn` is included ''' return hasattr(self, 'path_name') and hasattr(self, 'access') def _update_access_params(self): ''' Updates the path_params attribute with extracted parameters ''' if self.is_access_mixedin and self.path_name: if self.filename: params = self.access.extract(self.path_name, self.filename) if params: self._setup_access(params) elif self.objectid: self._set_access_path_params() self._setup_access(self.path_params)
[docs]class MMAccess(AccessMixIn, MMAMixIn): """ Class that mixes in the sdss_access functionality with the MMA This is a mixin class that adds multi-modal data access to any class that subclasses from this one. The MMA allows toggling between local and remote data access modes, or leaving it on automatic. Local mode access tries to load data via a database, if one exists, otherwise it loads data via a local filepath. Remote mode will try to load data over an API. When the mode is set to "auto", it automatically tries to first load things locally, and then remotely. Depending on the mode and logic, the MMA will set data_origin to either `file`, `db`, or `api`. Note that this class does not provide the logic for loading data from a db, over an API, or from a file. The user must provide that logic in a subclass. This mixin contains two abstractmethods you must override when subclassing. - **_set_access_path_params**: sets the arguments needed by `sdss_access` - **_parse_inputs**: provides logic to parse ``data_input`` into either filename or objectid Parameters ---------- data_input : str The file or name of target data to load filename : str The absolute filepath to data to load objectid : str The object identifier of the data to load mode : str The operating mode: auto, local, or remote release : str The data release of the object, e.g. "DR16" download : bool If True, downloads the object locally with sdss_access ignore_db : bool If True, ignores any database connection for local access use_db : `~sdssdb.connection.DatabaseConnection` a database connection to override the default with Attributes ---------- release : str The current data release loaded access : `~sdss_access.sync.Access` An instance of ``sdss_access`` using for all path creation and file downloads """ pass