# !/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Filename: mma.py
# Project: mixins
# Author: Brian Cherinka
# Created: Thursday, 8th October 2020 11:23:28 am
# License: BSD 3-clause "New" or "Revised" License
# Copyright (c) 2020 Brian Cherinka
# Last Modified: Thursday, 8th October 2020 11:23:28 am
# Modified By: Brian Cherinka
from __future__ import print_function, division, absolute_import
import abc
import six
import pathlib
import os
import warnings
from sdss_brain import log
from sdss_brain.mixins.access import AccessMixIn
from sdss_brain.config import config
from sdss_brain.exceptions import BrainError
__all__ = ['MMAMixIn', 'MMAccess']
[docs]class MMAMixIn(abc.ABC):
''' Mixin for implementing multi-modal data access
This is a mixin class that adds multi-modal data access to any class
that subclasses from this one. The MMA allows toggling between local
and remote data access modes, or leaving it on automatic. Local mode
access tries to load data via a database, if one exists, otherwise it loads
data via a local filepath. Remote mode will try to load data over an API.
When the mode is set to "auto", it automatically tries to first load things
locally, and then remotely. Depending on the mode and logic, the MMA will
set data_origin to either `file`, `db`, or `api`.
Note that this class does not provide the logic for loading data from a db, over an API,
or from a file. The user must provide that logic in a subclass.
This mixin contains three abstractmethods you must override when subclassing.
- **_parse_inputs**: provides logic to parse ``data_input`` into either filename or objectid
- **download**: a method for downloading a data file to a local disk
- **get_full_path**: a method for generating the absolute file path on disk to a file
Parameters
----------
data_input : str
The file or name of target data to load
filename : str
The absolute filepath to data to load
objectid : str
The object identifier of the data to load
mode : str
The operating mode: auto, local, or remote
release : str
The data release of the object, e.g. "DR16"
download : bool
If True, downloads the object locally with sdss_access
ignore_db : bool
If True, ignores any database connection for local access
use_db : `~sdssdb.connection.DatabaseConnection`
a database connection to override the default with
Attributes
----------
release : str
The current data release loaded
'''
def __init__(self, data_input: str = None, filename: str = None, objectid: str = None,
mode: str = None, release: str = None, download: bool = None, ignore_db:
bool = False, use_db: bool = None):
# data attributes
self._db = use_db
self.filename = filename
self.objectid = objectid
self.data_origin = None
# inputs or config variables
self.mode = mode or config.mode
self._release = release or config.release
self._forcedownload = download or config.download
self._ignore_db = ignore_db or config.ignore_db
# determine the input
self._determine_inputs(data_input)
assert self.mode in ['auto', 'local', 'remote'], 'mode must be auto, local, or remote'
assert self.filename is not None or self.objectid is not None, 'no inputs set.'
# perform the multi-modal data access
if self.mode == 'local':
self._do_local()
elif self.mode == 'remote':
self._do_remote()
elif self.mode == 'auto':
try:
self._do_local()
except BrainError as ee:
if self.filename:
# If the input contains a filename we don't want to go into remote mode.
raise(ee)
else:
log.debug('local mode failed. Trying remote now.')
self._do_remote()
# Sanity check to make sure data_origin has been properly set.
assert self.data_origin in ['file', 'db', 'api'], 'data_origin is not properly set.'
@property
def release(self) -> str:
""" Returns the release. """
return self._release
@release.setter
def release(self, value: str):
"""Fails when trying to set the release after instantiation."""
raise BrainError('the release cannot be changed once the object has been instantiated.')
def _do_local(self) -> None:
""" Check if it's possible to load the data locally."""
if self.filename:
# check if the file exists locally
if self.filename.exists():
self.mode = 'local'
self.data_origin = 'file'
else:
raise BrainError('input file {0} not found'.format(self.filename))
elif self.objectid:
# prioritize a database unless explicitly set to ignore
if self._db and self._db.connected and not self._ignore_db:
self.mode = 'local'
self.data_origin = 'db'
else:
# retrieve the full local sdss_access path
fullpath = self.get_full_path()
if fullpath and os.path.exists(fullpath):
self.mode = 'local'
self.filename = pathlib.Path(fullpath)
self.data_origin = 'file'
else:
# optionally download the file
if self._forcedownload:
self.download()
self.data_origin = 'file'
else:
raise BrainError('failed to retrieve data using '
'input parameters.')
def _do_remote(self) -> None:
""" Check if remote connection is possible."""
if self.filename:
raise BrainError('filename not allowed in remote mode.')
else:
self.mode = 'remote'
self.data_origin = 'api'
def _determine_inputs(self, data_input: str) -> None:
""" Determines what inputs to use in the decision tree.
Parameters
----------
data_input : str
The input string to attempt to parse into a filename or object id
"""
parsed_input = None
if data_input:
assert self.filename is None and self.objectid is None, \
'if input is set, filename and objectid cannot be set.'
if not isinstance(data_input, (six.string_types, pathlib.Path)):
raise TypeError('input must be a string or pathlib.Path')
# parse the input data into either a filename or objectid
parsed_input = self._parse_input(data_input)
if not parsed_input:
self.filename = data_input
else:
assert isinstance(
parsed_input, dict), 'return value of _parse_input must be a dict'
self.filename = parsed_input.get('filename', None)
self.objectid = parsed_input.get('objectid', None)
# ensure either filename or objectid is specified
if self.filename is None and self.objectid is None:
raise BrainError('no inputs defined. filename and objectid are both None')
# convert filename to a pathlib.Path and resolve a relative name
# not using pathlib.resolve to preserve symlinks
if self.filename:
self.filename = pathlib.Path(os.path.abspath(self.filename))
# issue a warning if the release is not indicated in the filename; possible mismatch
if self.release.lower() not in self.filename.as_posix():
warnings.warn('Your filename may not match the release indicated. Path parameters '
'may not be extracted properly. Try setting the release to match the '
'known file version.')
# attempt to update the access path parameters from the filename or parsed data input
self._update_access_params(params=parsed_input)
# check for any misaligments and misassignments
if self.filename:
self.objectid = None
if self.mode == 'remote':
raise BrainError('filename not allowed in remote mode.')
assert self.filename.exists, \
'filename {} does not exist.'.format(str(self.filename))
elif self.objectid:
assert not self.filename, 'invalid set of inputs.'
@abc.abstractmethod
def _parse_input(self, value: str) -> dict:
''' Parses the input value to determine the kind of input
This method must be overridden by each subclass and contains the logic
to determine the kind of input passed into it, i.e. either a filename or an
object identification string. This method accepts a single argument which is the
string `data_input` and must return a dictionary containing at least keys
for "filename" and "objectid".
Parameters
----------
value : str
The data_input string to attempt to parse
'''
[docs] @abc.abstractmethod
def download(self) -> None:
''' Abstract method to download a file '''
pass
[docs] @abc.abstractmethod
def get_full_path(self) -> str:
''' Abstract method to return a full local file path '''
pass
@property
def is_access_mixedin(self) -> bool:
''' Checks if the `~sdss_brain.mixins.access.AccessMixIn` is included '''
return hasattr(self, 'path_name') and hasattr(self, 'access')
def _update_access_params(self, params: str = None) -> None:
''' Updates the path_params attribute with extracted parameters
Parameters
----------
params : dict
The output from the _parse_input method
'''
if self.is_access_mixedin and self.path_name:
if self.filename:
# attempt to extract the path_params from the filename
params = self.access.extract(self.path_name, self.filename)
if params:
self._setup_access(params, origin='file')
elif self.objectid:
# set attributes from extracted parse_input
self._set_parsed_attributes(params)
# run the set_access_path_params method
self._set_access_path_params()
# set attributes from the path_params
self._setup_access(self.path_params, origin='object')
elif not self.is_access_mixedin and params:
# for non-access, set attributes from extracted parse_input
self._set_parsed_attributes(params)
def _set_parsed_attributes(self, params: dict) -> None:
""" Set instance attributes from any extracted params from parse_input
Parameters
----------
params : dict
The output from the _parse_input method
"""
if not params:
return
if type(params) != dict:
raise TypeError('the output of parse_input must be a dictionary')
for key, val in params.items():
if key in ['filename', 'objectid', 'parsed_groups', 'parsed_inputs']:
continue
# skip if a class attribute already exists
if hasattr(self.__class__, key):
continue
# if a work version is set for the given key, and no value is set, then
# use the work version
work_ver = self._version.get(key, None)
val = work_ver if work_ver and val is None else val
setattr(self, key, val)
[docs]class MMAccess(AccessMixIn, MMAMixIn):
""" Class that mixes in the sdss_access functionality with the MMA
This is a mixin class that adds multi-modal data access to any class
that subclasses from this one. The MMA allows toggling between local
and remote data access modes, or leaving it on automatic. Local mode
access tries to load data via a database, if one exists, otherwise it loads
data via a local filepath. Remote mode will try to load data over an API.
When the mode is set to "auto", it automatically tries to first load things
locally, and then remotely. Depending on the mode and logic, the MMA will
set data_origin to either `file`, `db`, or `api`.
Note that this class does not provide the logic for loading data from a db, over an API,
or from a file. The user must provide that logic in a subclass.
This mixin contains two abstractmethods you must override when subclassing.
- **_set_access_path_params**: sets the arguments needed by `sdss_access`
- **_parse_inputs**: provides logic to parse ``data_input`` into either filename or objectid
Parameters
----------
data_input : str
The file or name of target data to load
filename : str
The absolute filepath to data to load
objectid : str
The object identifier of the data to load
mode : str
The operating mode: auto, local, or remote
release : str
The data release of the object, e.g. "DR16"
download : bool
If True, downloads the object locally with sdss_access
ignore_db : bool
If True, ignores any database connection for local access
use_db : `~sdssdb.connection.DatabaseConnection`
a database connection to override the default with
Attributes
----------
release : str
The current data release loaded
access : `~sdss_access.sync.Access`
An instance of ``sdss_access`` using for all path creation and file downloads
"""
pass