Skip to content

Plugin

HarvestingPlugin

Bases: SingletonPlugin, DefaultDatasetForm

Custom plugin to deal with harvesting-related customizations.

This class exists in order to work around a bug in ckanext-spatial:

https://github.com/ckan/ckanext-spatial/issues/277

The mentioned bug prevents being able to have a CKAN extension plugin using both the IValidators and the ISpatialHarvester interfaces at the same time.

As an alternative, we have implemented the current plugin class with the aim to use it strictly for customization of the harvesters (i.e. implement the ISpatialHarvester interface) while the main plugin class (saeoss_plugin.SaeossPlugin) is still handling all of the other SAEOSS customizations.

get_package_dict

get_package_dict(context, data_dict)

Extension point required by ISpatialHarvester

Source code in ckanext/saeoss/plugins/harvesting_plugin.py
def get_package_dict(
    self, context: typing.Dict, data_dict: typing.Dict[str, typing.Any]
) -> typing.Dict[str, typing.Any]:
    """Extension point required by ISpatialHarvester"""
    package_dict = data_dict.get("package_dict", {})
    iso_values = data_dict.get("iso_values", {})
    parsed_resources = []
    for resource_dict in package_dict.get("resources", []):
        parsed_resources.append(
            _CkanResource(
                url=resource_dict.get("url"),
                format=resource_dict.get("format"),
                format_version="change me",
                name=resource_dict.get("name"),
                description=resource_dict.get("description") or None,
            )
        )
    parsed_tags = []
    for parsed_tag in package_dict.get("tags", []):
        parsed_tags.append(
            {"name": parsed_tag.get("name", ""), "vocabulary_id": None}
        )
    # declared_dataset_language = _get_possibly_list_item(
    #     iso_values, "dataset-language"
    # )
    # dataset_language = _get_language_code(declared_dataset_language or "en")
    iso_topic_category = _get_possibly_list_item(iso_values, "topic-category")
    equivalent_scale = _get_possibly_list_item(iso_values, "equivalent-scale")

    package_dict = _get_extras_subfields(package_dict)

    dataset = _CkanSaeossDataset(
        type="dataset",
        private=True,
        featured=False,
        name=package_dict.get("name"),
        title=package_dict.get("title"),
        notes=package_dict.get("notes"),
        iso_topic_category=iso_topic_category or "",
        owner_org=package_dict.get("owner_org", "kartoza"),
        maintainer=iso_values.get("contact"),
        maintainer_email=iso_values.get("contact-email"),
        license_id=LicenseNotSpecified.id,  # set this default and let publisher adjust
        spatial=_get_spatial_field(package_dict),
        resources=parsed_resources,
        tags=parsed_tags,
        source=None,
        version=None,
    )

    # for var in DATASET_Harvest_MINIMAL_SET_OF_FIELDS_MAPPING.values():
    #     setattr(dataset, var, None)

    new_data_dict = dataset.to_data_dict()
    new_data_dict.update(package_dict)
    # filers, remove these as you go
    new_data_dict["metadata_language_and_character_set-0-metadata_character_set"] = "ucs-2"
    new_data_dict["metadata_language_and_character_set-0-dataset_character_set"] = "ucs-2"
    new_data_dict["spatial_parameters-0-equivalent_scale"] = "5000"
    new_data_dict["spatial_parameters-0-spatial_representation_type"] = "001"
    new_data_dict["lineage_statement"] = "lineage_statement"
    del new_data_dict["extras"]

    new_data_dict["id"] = iso_values.get("guid")
    new_data_dict["metadata_modified"] = iso_values.get("date_updated")

    return new_data_dict

get_dataset_reference_date

get_dataset_reference_date(harvested_reference_date, data_dict)

the reference date can be more than one with different types (creation, revision, publication, ..etc.)

Source code in ckanext/saeoss/plugins/harvesting_plugin.py
def get_dataset_reference_date(
    harvested_reference_date: list,
    data_dict: typing.Dict
) -> typing.Dict:
    """
    the reference date can be more than one with
    different types (creation, revision, publication, ..etc.)
    """
    fallback_reference_date = None
    for idx, related_date in enumerate(harvested_reference_date):
        if (raw_date := related_date.get("value")) is not None:
            try:
                reference_date = dateutil.parser.parse(raw_date)
            except dateutil.parser.ParserError:
                logger.exception(
                    msg=f"Could not parse {raw_date!r} as a datetime"
                )
                result = fallback_reference_date
            else:
                result = dateutil.parser.parse(
                    reference_date.isoformat()
                ).strftime("%Y-%m-%dT%H:%M:%S")
            # get the type
            reference_date_type = get_reference_date_type(related_date.get("type"))
            reference_date_key = _get_subfield_key("dataset_reference_date", idx)
            reference_date_type_key = _get_subfield_key("dataset_reference_date_type", idx)
            data_dict[reference_date_key] = result
            data_dict[reference_date_type_key] = reference_date_type

    return data_dict

get_reference_date_type

get_reference_date_type(dateType)

with harvesters the data type comes as publication, revision, creation, ... we converts here to 001, 002, 003, ...

Source code in ckanext/saeoss/plugins/harvesting_plugin.py
def get_reference_date_type(dateType: str) -> str:
    """
    with harvesters the data type comes as
    publication, revision, creation, ...
    we converts here to 001, 002, 003, ...
    """
    if dateType == "revision":
        return "003"

    elif dateType == "publication":
        return "002"

    elif dateType == "creation":
        return "001"

SaeossPlugin

Bases: SingletonPlugin, DefaultDatasetForm

after_create

after_create(context, pkg_dict)

IPackageController interface requires reimplementation of this method.

Source code in ckanext/saeoss/plugins/saeoss_plugin.py
def after_create(self, context, pkg_dict):
    """IPackageController interface requires reimplementation of this method."""
    return context, pkg_dict

after_delete

after_delete(context, pkg_dict)

IPackageController interface requires reimplementation of this method.

Source code in ckanext/saeoss/plugins/saeoss_plugin.py
def after_delete(self, context, pkg_dict):
    """IPackageController interface requires reimplementation of this method."""
    return context, pkg_dict

after_load

after_load(service)

Control plugin loading mechanism

This method is implemented by the SaeossPlugin because we are adding a 1:1 relationship between our UserExtraFields model and CKAN's User model.

SQLAlchemy expects relationships to be configured on both sides, which means we have to modify CKAN's User model in order to make the relationship work. We do that in this function.

Source code in ckanext/saeoss/plugins/saeoss_plugin.py
def after_load(self, service):
    """Control plugin loading mechanism

    This method is implemented by the SaeossPlugin because we are adding
    a 1:1 relationship between our `UserExtraFields` model and CKAN's `User` model.

    SQLAlchemy expects relationships to be configured on both sides, which means
    we have to modify CKAN's User model in order to make the relationship work. We
    do that in this function.

    """

    model.User.extra_fields = orm.relationship(
        UserExtraFields, back_populates="user", uselist=False
    )
after_search(search_results, search_params)

IPackageController interface requires reimplementation of this method.

Source code in ckanext/saeoss/plugins/saeoss_plugin.py
def after_search(self, search_results, search_params):
    """IPackageController interface requires reimplementation of this method."""

    context = {}
    logger.debug(f"after search {context}")
    facets = OrderedDict()
    default_facet_titles = {
        "groups": _("Groups"),
        "tags": _("Tags"),
    }

    for facet in h.facets():
        if facet in default_facet_titles:
            facets[facet] = default_facet_titles[facet]
        else:
            facets[facet] = facet

    # Facet titles
    for plugin in plugins.PluginImplementations(plugins.IFacets):
        facets = plugin.dataset_facets(facets, "dataset")

    data_dict = {
        "fq": "",
        "facet.field": list(facets.keys()),
    }

    if not getattr(g, "user", None):
        data_dict["fq"] = "+capacity:public " + data_dict["fq"]

    query = search.query_for(model.Package)
    try:
        if context.get("ignore_auth") or c.userobj.sysadmin:
            labels = None
        else:
            labels = lib_plugins.get_permission_labels().get_user_dataset_labels(
                c.userobj
            )

        query.run(data_dict, permission_labels=labels)
    except:
        query.run(data_dict, permission_labels=None)

    facets = query.facets

    # organizations in the current search's facets.
    group_names = []
    for field_name in ("groups", "organization"):
        group_names.extend(facets.get(field_name, {}).keys())

    groups = (
        model.Session.query(model.Group.name, model.Group.title)
        .filter(model.Group.name.in_(group_names))
        .all()
        if group_names
        else []
    )
    group_titles_by_name = dict(groups)
    restructured_facets = {}
    for key, value in facets.items():
        restructured_facets[key] = {"title": key, "items": []}
        for key_, value_ in value.items():
            new_facet_dict = {"name": key_}
            if key in ("groups", "organization"):
                display_name = group_titles_by_name.get(key_, key_)
                display_name = (
                    display_name if display_name and display_name.strip() else key_
                )
                new_facet_dict["display_name"] = display_name
            else:
                new_facet_dict["display_name"] = key_
            new_facet_dict["count"] = value_
            restructured_facets[key]["items"].append(new_facet_dict)
    search_results["search_facets"] = restructured_facets

    return search_results

after_show

after_show(context, pkg_dict)

IPackageController interface requires reimplementation of this method.

Source code in ckanext/saeoss/plugins/saeoss_plugin.py
def after_show(self, context, pkg_dict):
    """IPackageController interface requires reimplementation of this method."""
    return context, pkg_dict

after_unload

after_unload(service)

IPluginObserver interface requires reimplementation of this method.

Source code in ckanext/saeoss/plugins/saeoss_plugin.py
def after_unload(self, service):
    """IPluginObserver interface requires reimplementation of this method."""
    pass

after_update

after_update(context, pkg_dict)

IPackageController interface requires reimplementation of this method.

Source code in ckanext/saeoss/plugins/saeoss_plugin.py
def after_update(self, context, pkg_dict):
    """IPackageController interface requires reimplementation of this method."""
    return context, pkg_dict

before_create

before_create(context, resource)

Extensions will receive this before a resource is created.

Parameters:

Name Type Description Default
context dictionary

The context object of the current request, this includes for example access to the model and the user.

required
resource dictionary

An object representing the resource to be added to the dataset (the one that is about to be created).

required
Source code in ckanext/saeoss/plugins/saeoss_plugin.py
def before_create(self, context, resource):
    u'''
    Extensions will receive this before a resource is created.

    :param context: The context object of the current request, this
        includes for example access to the ``model`` and the ``user``.
    :type context: dictionary
    :param resource: An object representing the resource to be added
        to the dataset (the one that is about to be created).
    :type resource: dictionary
    '''

    logger.debug(f"resource create {resource}")

before_delete

before_delete(mapper, connection, instance)

Receive an object instance before that instance is PURGEd. (whereas usually in ckan 'delete' means to change the state property to deleted, so use before_update for that case.)

Source code in ckanext/saeoss/plugins/saeoss_plugin.py
def before_delete(self, mapper, connection, instance):
    u'''
    Receive an object instance before that instance is PURGEd.
    (whereas usually in ckan 'delete' means to change the state property to
    deleted, so use before_update for that case.)
    '''
    pass

before_index

before_index(pkg_dict)

IPackageController interface requires reimplementation of this method.

Source code in ckanext/saeoss/plugins/saeoss_plugin.py
def before_index(self, pkg_dict):
    """IPackageController interface requires reimplementation of this method."""
    return pkg_dict

before_load

before_load(plugin_class)

IPluginObserver interface requires reimplementation of this method.

Source code in ckanext/saeoss/plugins/saeoss_plugin.py
def before_load(self, plugin_class):
    """IPluginObserver interface requires reimplementation of this method."""
    pass

before_show

before_show(resource_dict)

Extensions will receive the validated data dict before the resource is ready for display.

Be aware that this method is not only called for UI display, but also in other methods, like when a resource is deleted, because package_show is used to get access to the resources in a dataset.

Source code in ckanext/saeoss/plugins/saeoss_plugin.py
def before_show(self, resource_dict):
    u'''
    Extensions will receive the validated data dict before the resource
    is ready for display.

    Be aware that this method is not only called for UI display, but also
    in other methods, like when a resource is deleted, because package_show
    is used to get access to the resources in a dataset.
    '''
    return resource_dict

before_unload

before_unload(plugin_class)

IPluginObserver interface requires reimplementation of this method.

Source code in ckanext/saeoss/plugins/saeoss_plugin.py
def before_unload(self, plugin_class):
    """IPluginObserver interface requires reimplementation of this method."""
    pass

before_update

before_update(mapper, connection, instance)

Receive an object instance before that instance is UPDATEed.

Source code in ckanext/saeoss/plugins/saeoss_plugin.py
def before_update(self, mapper, connection, instance):
    u'''
    Receive an object instance before that instance is UPDATEed.
    '''
    logger.debug(f"resource update {instance}")

create

create(entity)

IPackageController interface requires reimplementation of this method.

Source code in ckanext/saeoss/plugins/saeoss_plugin.py
def create(self, entity):
    """IPackageController interface requires reimplementation of this method."""
    return entity

delete

delete(entity)

IPackageController interface requires reimplementation of this method.

Source code in ckanext/saeoss/plugins/saeoss_plugin.py
def delete(self, entity):
    """IPackageController interface requires reimplementation of this method."""
    return entity

edit

edit(entity)

IPackageController interface requires reimplementation of this method.

Source code in ckanext/saeoss/plugins/saeoss_plugin.py
def edit(self, entity):
    """IPackageController interface requires reimplementation of this method."""
    return entity

group_facets

group_facets(facets_dict, group_type, package_type)

IFacets interface requires reimplementation of all facets-related methods

In this case we do not really need to override this method, but need to satisfy IFacets.

Source code in ckanext/saeoss/plugins/saeoss_plugin.py
def group_facets(
    self, facets_dict: typing.OrderedDict, group_type: str, package_type: str
) -> typing.OrderedDict:
    """IFacets interface requires reimplementation of all facets-related methods

    In this case we do not really need to override this method, but need to satisfy
    IFacets.

    """

    return facets_dict

organization_facets

organization_facets(facets_dict, group_type, package_type)

IFacets interface requires reimplementation of all facets-related methods

In this case we do not really need to override this method, but need to satisfy IFacets.

Source code in ckanext/saeoss/plugins/saeoss_plugin.py
def organization_facets(
    self, facets_dict: typing.OrderedDict, group_type: str, package_type: str
) -> typing.OrderedDict:
    """IFacets interface requires reimplementation of all facets-related methods

    In this case we do not really need to override this method, but need to satisfy
    IFacets.

    """

    return facets_dict

read

read(entity)

IPackageController interface requires reimplementation of this method.

Source code in ckanext/saeoss/plugins/saeoss_plugin.py
def read(self, entity):
    """IPackageController interface requires reimplementation of this method."""
    return entity

this file holds modules used

by emc_dcpr plugin for different

functionalities. these modules are

different from helper functions as

they aren't used by the UI.

handle_search

handle_search(search_params)

we use combine -AND operator- search params when they are from the same category "e.g. 2 different organizations", and use OR opertaor for different categories.

Source code in ckanext/saeoss/plugins/utils.py
def handle_search(search_params):
    """
    we use combine -AND operator-
    search params when they are from
    the same category "e.g. 2 different
    organizations", and use OR opertaor
    for different categories.
    """
    # \[.*\]
    # lstrip removes leading spaces
    search_param = search_params["fq"].lstrip()

    fq_list = skip_brackets(search_param)

    # fq_list = search_param.split()  # the default is space
    fq_dict = {}
    if len(fq_list) <= 1:
        return search_params["fq"]
    for idx, item in enumerate(fq_list):
        # try:
        #     key_value_pair = item.split(":")
        #     if key_value_pair[0] not in fq_dict:
        #         fq_dict[key_value_pair[0]] = key_value_pair[1]
        # except:
        #     continue

        try:
            if idx > 0:
                fq_list[idx] = " OR " + fq_list[idx] + " "

        except:
            continue
    if len(fq_list) > 0:
        search_params["fq"] = " ".join(item for item in fq_list)

    return search_params["fq"]

skip_brackets

skip_brackets(search_param)

split the search param while skipping the spaces between brackets and between doubled quotes (e.g. the sasdi theme "Administrative boundaries 1" )

Source code in ckanext/saeoss/plugins/utils.py
def skip_brackets(search_param: str):
    """
    split the search param while
    skipping the spaces between
    brackets and between
    doubled quotes (e.g.
    the sasdi theme
    "Administrative boundaries 1"
    )
    """
    lbracket, rbracket = "[", "]"
    brackets_num = 0
    dbl_quotes_num = 0
    sep, sep_idx = " ", [0]

    for idx, char in enumerate(search_param):
        if char == lbracket:
            brackets_num += 1
        elif char == rbracket:
            brackets_num -= 1
        elif brackets_num < 0:
            return search_param

        elif char == '"' and dbl_quotes_num == 0:
            dbl_quotes_num += 1

        elif char == '"' and dbl_quotes_num == 1:
            dbl_quotes_num -= 1

        elif brackets_num == 0 and dbl_quotes_num == 0 and char == sep:
            sep_idx.append(idx)
    # we need to slice
    sep_idx.append(len(search_param))
    # at this point the num of brackets should be 0
    if brackets_num > 0:
        return search_param

    return [search_param[i:j].strip(sep) for i, j in zip(sep_idx, sep_idx[1:])]