go_utils.geoenrich
1from datetime import datetime 2 3import numpy as np 4from arcgis.features import GeoAccessor 5from arcgis.gis import GIS 6 7from go_utils.constants import ( 8 abbreviation_dict, 9 end_date, 10 landcover_protocol, 11 mosquito_protocol, 12 region_dict, 13 start_date, 14) 15from go_utils.download import convert_dates_to_datetime, default_data_clean 16 17 18def get_country_api_data( 19 protocol, 20 start_date=start_date, 21 end_date=end_date, 22 is_clean=True, 23 countries=[], 24 regions=[], 25): 26 """ 27 Gets country enriched API Data. Due note that this data comes from layers in ArcGIS that are updated daily. Therefore, there will be some delay between when an entry is uploaded onto the GLOBE data base and being on the ArcGIS dataset. 28 29 Parameters 30 ---------- 31 protocol : str, {"mosquito_habitat_mapper", "land_covers"} 32 The desired GLOBE Observer Protocol. Currently only mosquito habitat mapper and land cover is supported. 33 start_date : str, default= 2017-05-31 34 The desired start date of the dataset in the format of (YYYY-MM-DD). 35 end_date : str, default= today's date in YYYY-MM-DD form. 36 The desired end date of the dataset in the format of (YYYY-MM-DD). 37 countries : list of str, default=[] 38 The list of desired countries. Look at go_utils.info.region_dict to see supported country names. If the list is empty, all data will be included. 39 regions : list of str, default=[] 40 The list of desired regions. Look at go_utils.info.region_dict to see supported region names and the countries they enclose. If the list is empty, all data will be included. 41 latlon_box : dict of {str, double}, optional 42 The longitudes and latitudes of a bounding box for the dataset. The minimum/maximum latitudes and longitudes must be specified with the following keys: "min_lat", "min_lon", "max_lat", "max_lon". The default value specifies all latitude and longitude coordinates. 43 """ 44 45 item_id_dict = { 46 mosquito_protocol: "a018521fbc3f42bc848d3fa4c52e02ce", 47 landcover_protocol: "fe54b831415f44d2b1640327ae276fb8", 48 } 49 50 if protocol not in item_id_dict: 51 raise ValueError( 52 "Invalid protocol, currently only 'mosquito_habitat_mapper' and 'land_covers' are supported." 53 ) 54 55 gis = GIS() 56 item = gis.content.get(itemid=item_id_dict[protocol]) 57 df = GeoAccessor.from_layer(item.layers[0]) 58 59 if "SHAPE" in df: 60 df.drop(["SHAPE"], axis=1, inplace=True) 61 62 # Due to the size of the mhm column names, ArcGIS truncates the names so it must be renamed in this step. 63 if protocol == "mosquito_habitat_mapper": 64 65 mhm_rename_dict = { 66 col: f"mosquitohabitatmapper{col}" 67 for col in df.columns 68 if col[0].isupper() and col != "COUNTRY" 69 } 70 df.rename( 71 mhm_rename_dict, 72 axis=1, 73 inplace=True, 74 ) 75 76 # Filter the dates 77 start = datetime.strptime(start_date, "%Y-%m-%d") 78 end = datetime.strptime(end_date, "%Y-%m-%d") 79 measured_at = protocol.replace("_", "") + "MeasuredAt" 80 81 convert_dates_to_datetime(df) 82 83 df = df[(df[measured_at] >= start) & (df[measured_at] <= end)] 84 85 if is_clean: 86 df = default_data_clean(df, protocol) 87 88 for region in regions: 89 countries.extend(region_dict[region]) 90 countries_set = set(countries) 91 # Return the regular data if nothing is specified 92 if not countries_set: 93 return df 94 else: 95 mask = _get_valid_countries_mask(df, protocol, countries_set) 96 return df[mask] 97 98 99def _get_valid_countries_mask(df, protocol, country_list): 100 country_filter = np.vectorize(lambda country_col: country_col in country_list) 101 mask = country_filter(df[f"{abbreviation_dict[protocol]}_COUNTRY"].to_numpy()) 102 return mask
def
get_country_api_data( protocol, start_date='2017-05-31', end_date='2022-08-05', is_clean=True, countries=[], regions=[])
19def get_country_api_data( 20 protocol, 21 start_date=start_date, 22 end_date=end_date, 23 is_clean=True, 24 countries=[], 25 regions=[], 26): 27 """ 28 Gets country enriched API Data. Due note that this data comes from layers in ArcGIS that are updated daily. Therefore, there will be some delay between when an entry is uploaded onto the GLOBE data base and being on the ArcGIS dataset. 29 30 Parameters 31 ---------- 32 protocol : str, {"mosquito_habitat_mapper", "land_covers"} 33 The desired GLOBE Observer Protocol. Currently only mosquito habitat mapper and land cover is supported. 34 start_date : str, default= 2017-05-31 35 The desired start date of the dataset in the format of (YYYY-MM-DD). 36 end_date : str, default= today's date in YYYY-MM-DD form. 37 The desired end date of the dataset in the format of (YYYY-MM-DD). 38 countries : list of str, default=[] 39 The list of desired countries. Look at go_utils.info.region_dict to see supported country names. If the list is empty, all data will be included. 40 regions : list of str, default=[] 41 The list of desired regions. Look at go_utils.info.region_dict to see supported region names and the countries they enclose. If the list is empty, all data will be included. 42 latlon_box : dict of {str, double}, optional 43 The longitudes and latitudes of a bounding box for the dataset. The minimum/maximum latitudes and longitudes must be specified with the following keys: "min_lat", "min_lon", "max_lat", "max_lon". The default value specifies all latitude and longitude coordinates. 44 """ 45 46 item_id_dict = { 47 mosquito_protocol: "a018521fbc3f42bc848d3fa4c52e02ce", 48 landcover_protocol: "fe54b831415f44d2b1640327ae276fb8", 49 } 50 51 if protocol not in item_id_dict: 52 raise ValueError( 53 "Invalid protocol, currently only 'mosquito_habitat_mapper' and 'land_covers' are supported." 54 ) 55 56 gis = GIS() 57 item = gis.content.get(itemid=item_id_dict[protocol]) 58 df = GeoAccessor.from_layer(item.layers[0]) 59 60 if "SHAPE" in df: 61 df.drop(["SHAPE"], axis=1, inplace=True) 62 63 # Due to the size of the mhm column names, ArcGIS truncates the names so it must be renamed in this step. 64 if protocol == "mosquito_habitat_mapper": 65 66 mhm_rename_dict = { 67 col: f"mosquitohabitatmapper{col}" 68 for col in df.columns 69 if col[0].isupper() and col != "COUNTRY" 70 } 71 df.rename( 72 mhm_rename_dict, 73 axis=1, 74 inplace=True, 75 ) 76 77 # Filter the dates 78 start = datetime.strptime(start_date, "%Y-%m-%d") 79 end = datetime.strptime(end_date, "%Y-%m-%d") 80 measured_at = protocol.replace("_", "") + "MeasuredAt" 81 82 convert_dates_to_datetime(df) 83 84 df = df[(df[measured_at] >= start) & (df[measured_at] <= end)] 85 86 if is_clean: 87 df = default_data_clean(df, protocol) 88 89 for region in regions: 90 countries.extend(region_dict[region]) 91 countries_set = set(countries) 92 # Return the regular data if nothing is specified 93 if not countries_set: 94 return df 95 else: 96 mask = _get_valid_countries_mask(df, protocol, countries_set) 97 return df[mask]
Gets country enriched API Data. Due note that this data comes from layers in ArcGIS that are updated daily. Therefore, there will be some delay between when an entry is uploaded onto the GLOBE data base and being on the ArcGIS dataset.
Parameters
- protocol (str, {"mosquito_habitat_mapper", "land_covers"}): The desired GLOBE Observer Protocol. Currently only mosquito habitat mapper and land cover is supported.
- start_date (str, default= 2017-05-31): The desired start date of the dataset in the format of (YYYY-MM-DD).
- end_date (str, default= today's date in YYYY-MM-DD form.): The desired end date of the dataset in the format of (YYYY-MM-DD).
- countries (list of str, default=[]): The list of desired countries. Look at go_utils.info.region_dict to see supported country names. If the list is empty, all data will be included.
- regions (list of str, default=[]): The list of desired regions. Look at go_utils.info.region_dict to see supported region names and the countries they enclose. If the list is empty, all data will be included.
- latlon_box (dict of {str, double}, optional): The longitudes and latitudes of a bounding box for the dataset. The minimum/maximum latitudes and longitudes must be specified with the following keys: "min_lat", "min_lon", "max_lat", "max_lon". The default value specifies all latitude and longitude coordinates.