Source code for bella.helper

'''
Functions that are used throughout the package:

1. :py:func:`bella.helper.read_config` -- Given a file location to a Yaml file
   that stores only keys and relative or full path locations as values. Returns
   those paths as a fully resolved String given the key.
2. :py:func:`download_file` -- Given a file path and the URL address of the
   data, downloads the data from the URL to the file path location.
3. :py:func:`download_model` -- Downloads the specified model trained on the
   dataset from the model zoo and returns it as a trained model to use.
'''
from pathlib import Path

import requests
from ruamel.yaml import YAML

import bella


BELLA_MODEL_DIR = Path.home().joinpath('.Bella', 'Models')


[docs]def read_config(key: str, config_file_path: Path) -> str:
    '''
    Given a file location to a Yaml file that stores only keys and relative
    or full path locations as values. Returns those paths as a fully resolved
    String given the key.

    :param key: The key to the Path within the config file.
    :param config_file_path: File path to the YAML configuration file.
    :return: Full path as a String to the relative file location that is
             stored in the key of the config file.
    '''

    config_parent = config_file_path.parent
    with config_file_path.open('r') as config_file:
        yaml = YAML()
        config_data = yaml.load(config_file)
        if key in config_data:
            file_path = config_data[key]
            key_file_path = config_parent.joinpath(file_path)
            return str(key_file_path.resolve())
        else:
            raise ValueError(f'This key {key} does not exist in the '
                             f'config file {config_file_path}')


[docs]def download_file(file_path: Path, url: str) -> None:
    '''
    Given a file path and the URL address of the data, downloads the data
    from the URL to the file path location.

    :param file_path: Path to save the downloaded data to.
    :param url: URL location of the data to be downloaded.
    :return: Nothing
    '''
    with file_path.open('wb') as a_file:
        request = requests.get(url, stream=True)
        for chunk in request.iter_content(chunk_size=128):
            a_file.write(chunk)


[docs]def download_model(model: 'bella.models.base.BaseModel',
                   dataset_name: str) -> 'bella.models.base.BaseModel':
    '''
    Downloads the specified model trained on the dataset from the
    model zoo and returns it as a trained model to use.

    The model zoo `URL <https://delta.lancs.ac.uk/mooreap/bella-models>`_

    The model zoo came from the following `paper <https://aclanthology.coli.un\
    i-saarland.de/papers/C18-1097/c18-1097>`_ and results for those models
    on the relevant dataset are within the Mass Evaluation section of the paper

    :param model: Class of model you want to download
    :param dataset_name: Name of the dataset that the model has been trained
                         on.
    :return: An instance of the model class you gave as an argument trained
             on the dataset specified.
    '''

    BELLA_MODEL_DIR.mkdir(parents=True, exist_ok=True)
    base_url = 'https://delta.lancs.ac.uk/mooreap/bella-models/raw/master/'

    model_name = model.name()
    model_file_name = f'{model_name} {dataset_name}'
    model_path = BELLA_MODEL_DIR.joinpath(model_file_name)
    if 'LSTM' in model_name:
        meta_data_path = model_path.with_suffix('.pkl')
        model_data_path = model_path.with_suffix('.h5')
        data_paths = [meta_data_path, model_data_path]
        for data_path in data_paths:
            if not data_path.is_file():
                data_url = base_url + data_path.name
                data_url = data_url.replace(' ', '%20')
                download_file(data_path, data_url)

    else:
        if not model_path.is_file():
            model_url = base_url + model_file_name
            model_url = model_url.replace(' ', '%20')
            download_file(model_path, model_url)
    return model.load(model_path)