go_utils.geoenrich

  1from datetime import datetime
  2
  3import numpy as np
  4from arcgis.features import GeoAccessor
  5from arcgis.gis import GIS
  6
  7from go_utils.constants import (
  8    abbreviation_dict,
  9    end_date,
 10    landcover_protocol,
 11    mosquito_protocol,
 12    region_dict,
 13    start_date,
 14)
 15from go_utils.download import convert_dates_to_datetime, default_data_clean
 16
 17
 18def get_country_api_data(
 19    protocol,
 20    start_date=start_date,
 21    end_date=end_date,
 22    is_clean=True,
 23    countries=[],
 24    regions=[],
 25):
 26    """
 27    Gets country enriched API Data. Due note that this data comes from layers in ArcGIS that are updated daily. Therefore, there will be some delay between when an entry is uploaded onto the GLOBE data base and being on the ArcGIS dataset.
 28
 29    Parameters
 30    ----------
 31    protocol : str, {"mosquito_habitat_mapper", "land_covers"}
 32        The desired GLOBE Observer Protocol. Currently only mosquito habitat mapper and land cover is supported.
 33    start_date : str, default= 2017-05-31
 34        The desired start date of the dataset in the format of (YYYY-MM-DD).
 35    end_date : str, default= today's date in YYYY-MM-DD form.
 36        The desired end date of the dataset in the format of (YYYY-MM-DD).
 37    countries : list of str, default=[]
 38        The list of desired countries. Look at go_utils.info.region_dict to see supported country names. If the list is empty, all data will be included.
 39    regions : list of str, default=[]
 40        The list of desired regions. Look at go_utils.info.region_dict to see supported region names and the countries they enclose. If the list is empty, all data will be included.
 41    latlon_box : dict of {str, double}, optional
 42        The longitudes and latitudes of a bounding box for the dataset. The minimum/maximum latitudes and longitudes must be specified with the following keys: "min_lat", "min_lon", "max_lat", "max_lon". The default value specifies all latitude and longitude coordinates.
 43    """
 44
 45    item_id_dict = {
 46        mosquito_protocol: "a018521fbc3f42bc848d3fa4c52e02ce",
 47        landcover_protocol: "fe54b831415f44d2b1640327ae276fb8",
 48    }
 49
 50    if protocol not in item_id_dict:
 51        raise ValueError(
 52            "Invalid protocol, currently only 'mosquito_habitat_mapper' and 'land_covers' are supported."
 53        )
 54
 55    gis = GIS()
 56    item = gis.content.get(itemid=item_id_dict[protocol])
 57    df = GeoAccessor.from_layer(item.layers[0])
 58
 59    if "SHAPE" in df:
 60        df.drop(["SHAPE"], axis=1, inplace=True)
 61
 62    # Due to the size of the mhm column names, ArcGIS truncates the names so it must be renamed in this step.
 63    if protocol == "mosquito_habitat_mapper":
 64
 65        mhm_rename_dict = {
 66            col: f"mosquitohabitatmapper{col}"
 67            for col in df.columns
 68            if col[0].isupper() and col != "COUNTRY"
 69        }
 70        df.rename(
 71            mhm_rename_dict,
 72            axis=1,
 73            inplace=True,
 74        )
 75
 76    # Filter the dates
 77    start = datetime.strptime(start_date, "%Y-%m-%d")
 78    end = datetime.strptime(end_date, "%Y-%m-%d")
 79    measured_at = protocol.replace("_", "") + "MeasuredAt"
 80
 81    convert_dates_to_datetime(df)
 82
 83    df = df[(df[measured_at] >= start) & (df[measured_at] <= end)]
 84
 85    if is_clean:
 86        df = default_data_clean(df, protocol)
 87
 88    for region in regions:
 89        countries.extend(region_dict[region])
 90    countries_set = set(countries)
 91    # Return the regular data if nothing is specified
 92    if not countries_set:
 93        return df
 94    else:
 95        mask = _get_valid_countries_mask(df, protocol, countries_set)
 96        return df[mask]
 97
 98
 99def _get_valid_countries_mask(df, protocol, country_list):
100    country_filter = np.vectorize(lambda country_col: country_col in country_list)
101    mask = country_filter(df[f"{abbreviation_dict[protocol]}_COUNTRY"].to_numpy())
102    return mask
def get_country_api_data( protocol, start_date='2017-05-31', end_date='2022-08-05', is_clean=True, countries=[], regions=[])
19def get_country_api_data(
20    protocol,
21    start_date=start_date,
22    end_date=end_date,
23    is_clean=True,
24    countries=[],
25    regions=[],
26):
27    """
28    Gets country enriched API Data. Due note that this data comes from layers in ArcGIS that are updated daily. Therefore, there will be some delay between when an entry is uploaded onto the GLOBE data base and being on the ArcGIS dataset.
29
30    Parameters
31    ----------
32    protocol : str, {"mosquito_habitat_mapper", "land_covers"}
33        The desired GLOBE Observer Protocol. Currently only mosquito habitat mapper and land cover is supported.
34    start_date : str, default= 2017-05-31
35        The desired start date of the dataset in the format of (YYYY-MM-DD).
36    end_date : str, default= today's date in YYYY-MM-DD form.
37        The desired end date of the dataset in the format of (YYYY-MM-DD).
38    countries : list of str, default=[]
39        The list of desired countries. Look at go_utils.info.region_dict to see supported country names. If the list is empty, all data will be included.
40    regions : list of str, default=[]
41        The list of desired regions. Look at go_utils.info.region_dict to see supported region names and the countries they enclose. If the list is empty, all data will be included.
42    latlon_box : dict of {str, double}, optional
43        The longitudes and latitudes of a bounding box for the dataset. The minimum/maximum latitudes and longitudes must be specified with the following keys: "min_lat", "min_lon", "max_lat", "max_lon". The default value specifies all latitude and longitude coordinates.
44    """
45
46    item_id_dict = {
47        mosquito_protocol: "a018521fbc3f42bc848d3fa4c52e02ce",
48        landcover_protocol: "fe54b831415f44d2b1640327ae276fb8",
49    }
50
51    if protocol not in item_id_dict:
52        raise ValueError(
53            "Invalid protocol, currently only 'mosquito_habitat_mapper' and 'land_covers' are supported."
54        )
55
56    gis = GIS()
57    item = gis.content.get(itemid=item_id_dict[protocol])
58    df = GeoAccessor.from_layer(item.layers[0])
59
60    if "SHAPE" in df:
61        df.drop(["SHAPE"], axis=1, inplace=True)
62
63    # Due to the size of the mhm column names, ArcGIS truncates the names so it must be renamed in this step.
64    if protocol == "mosquito_habitat_mapper":
65
66        mhm_rename_dict = {
67            col: f"mosquitohabitatmapper{col}"
68            for col in df.columns
69            if col[0].isupper() and col != "COUNTRY"
70        }
71        df.rename(
72            mhm_rename_dict,
73            axis=1,
74            inplace=True,
75        )
76
77    # Filter the dates
78    start = datetime.strptime(start_date, "%Y-%m-%d")
79    end = datetime.strptime(end_date, "%Y-%m-%d")
80    measured_at = protocol.replace("_", "") + "MeasuredAt"
81
82    convert_dates_to_datetime(df)
83
84    df = df[(df[measured_at] >= start) & (df[measured_at] <= end)]
85
86    if is_clean:
87        df = default_data_clean(df, protocol)
88
89    for region in regions:
90        countries.extend(region_dict[region])
91    countries_set = set(countries)
92    # Return the regular data if nothing is specified
93    if not countries_set:
94        return df
95    else:
96        mask = _get_valid_countries_mask(df, protocol, countries_set)
97        return df[mask]

Gets country enriched API Data. Due note that this data comes from layers in ArcGIS that are updated daily. Therefore, there will be some delay between when an entry is uploaded onto the GLOBE data base and being on the ArcGIS dataset.

Parameters
  • protocol (str, {"mosquito_habitat_mapper", "land_covers"}): The desired GLOBE Observer Protocol. Currently only mosquito habitat mapper and land cover is supported.
  • start_date (str, default= 2017-05-31): The desired start date of the dataset in the format of (YYYY-MM-DD).
  • end_date (str, default= today's date in YYYY-MM-DD form.): The desired end date of the dataset in the format of (YYYY-MM-DD).
  • countries (list of str, default=[]): The list of desired countries. Look at go_utils.info.region_dict to see supported country names. If the list is empty, all data will be included.
  • regions (list of str, default=[]): The list of desired regions. Look at go_utils.info.region_dict to see supported region names and the countries they enclose. If the list is empty, all data will be included.
  • latlon_box (dict of {str, double}, optional): The longitudes and latitudes of a bounding box for the dataset. The minimum/maximum latitudes and longitudes must be specified with the following keys: "min_lat", "min_lon", "max_lat", "max_lon". The default value specifies all latitude and longitude coordinates.