go_utils.download
1import logging 2 3import pandas as pd 4import requests 5 6import go_utils.lc as lc 7import go_utils.mhm as mhm 8from go_utils.constants import ( 9 end_date, 10 landcover_protocol, 11 mosquito_protocol, 12 start_date, 13) 14 15 16def parse_api_data(response_json): 17 try: 18 results = response_json["results"] 19 df = pd.DataFrame(results) 20 except KeyError: 21 raise RuntimeError("Data Download Failed. The GLOBE API is most likely down.") 22 23 # Expand the 'data' column by listing the contents and passing as a new dataframe 24 df = pd.concat([df, pd.DataFrame(list(df["data"]))], axis=1) 25 # Drop the previously nested data column 26 df = df.drop(labels="data", axis=1) 27 28 # Display the dataframe 29 return df 30 31 32def is_valid_latlon_box(latlon_box): 33 34 valid_lat_checks = ( 35 latlon_box["min_lat"] < latlon_box["max_lat"] 36 and latlon_box["max_lat"] <= 90 37 and latlon_box["min_lat"] >= -90 38 ) 39 valid_lon_checks = ( 40 latlon_box["min_lon"] < latlon_box["max_lon"] 41 and latlon_box["max_lon"] <= 180 42 and latlon_box["min_lon"] >= -180 43 ) 44 45 return valid_lon_checks and valid_lat_checks 46 47 48def get_api_data( 49 protocol, 50 start_date=start_date, 51 end_date=end_date, 52 is_clean=True, 53 latlon_box={"min_lat": -90, "max_lat": 90, "min_lon": -180, "max_lon": 180}, 54): 55 """Utility function for interfacing with the GLOBE API. 56 More information about the API can be viewed [here](https://www.globe.gov/es/globe-data/globe-api). 57 58 Parameters 59 ---------- 60 protocol : str 61 The desired GLOBE Observer Protocol. Protocols for the App protocols include: `land_covers` (Landcover), `mosquito_habitat_mapper` (Mosquito Habitat Mapper), `sky_conditions` (Clouds), `tree_heights` (Trees). 62 start_date : str, default= 2017-05-31 63 The desired start date of the dataset in the format of (YYYY-MM-DD). 64 end_date : str, default= today's date in YYYY-MM-DD form. 65 The desired end date of the dataset in the format of (YYYY-MM-DD). 66 latlon_box : dict of {str, double}, optional 67 The longitudes and latitudes of a bounding box for the dataset. The minimum/maximum latitudes and longitudes must be specified with the following keys: "min_lat", "min_lon", "max_lat", "max_lon". The default value specifies all latitude and longitude coordinates. 68 69 Returns 70 ------- 71 pd.DataFrame 72 A DataFrame containing Raw GLOBE Observer Data of the specified parameters 73 """ 74 75 if is_valid_latlon_box(latlon_box): 76 url = f"https://api.globe.gov/search/v1/measurement/protocol/measureddate/lat/lon/?protocols={protocol}&startdate={start_date}&enddate={end_date}&minlat={str(latlon_box['min_lat'])}&maxlat={str(latlon_box['max_lat'])}&minlon={str(latlon_box['min_lon'])}&maxlon={str(latlon_box['max_lon'])}&geojson=FALSE&sample=FALSE" 77 else: 78 logging.warning( 79 "You did not enter any valid/specific coordinates, so we gave you all the observations for your protocol, date_range, and any countryNames you may have specified.\n" 80 ) 81 url = f"https://api.globe.gov/search/v1/measurement/protocol/measureddate/?protocols={protocol}&startdate={start_date}&enddate={end_date}&geojson=FALSE&sample=FALSE" 82 83 # Downloads data from the GLOBE API 84 response = requests.get(url) 85 86 if not response: 87 raise RuntimeError( 88 "Failed to get data from the API. Double check your specified settings to make sure they are valid." 89 ) 90 91 # Convert measured date data into datetime 92 df = parse_api_data(response.json()) 93 convert_dates_to_datetime(df) 94 95 if is_clean: 96 df = default_data_clean(df, protocol) 97 return df 98 99 100def convert_dates_to_datetime(df): 101 date_columns = [col for col in df.columns if "Date" in col or "MeasuredAt" in col] 102 for column in date_columns: 103 df[column] = pd.to_datetime(df[column], errors="coerce") 104 105 106def default_data_clean(df, protocol): 107 module_mapper = {mosquito_protocol: mhm, landcover_protocol: lc} 108 if protocol in module_mapper: 109 df = module_mapper[protocol].apply_cleanup(df) 110 df = module_mapper[protocol].add_flags(df) 111 else: 112 logging.warning("The protocol you entered is not supported for cleanup.") 113 114 return df
def
parse_api_data(response_json)
17def parse_api_data(response_json): 18 try: 19 results = response_json["results"] 20 df = pd.DataFrame(results) 21 except KeyError: 22 raise RuntimeError("Data Download Failed. The GLOBE API is most likely down.") 23 24 # Expand the 'data' column by listing the contents and passing as a new dataframe 25 df = pd.concat([df, pd.DataFrame(list(df["data"]))], axis=1) 26 # Drop the previously nested data column 27 df = df.drop(labels="data", axis=1) 28 29 # Display the dataframe 30 return df
def
is_valid_latlon_box(latlon_box)
33def is_valid_latlon_box(latlon_box): 34 35 valid_lat_checks = ( 36 latlon_box["min_lat"] < latlon_box["max_lat"] 37 and latlon_box["max_lat"] <= 90 38 and latlon_box["min_lat"] >= -90 39 ) 40 valid_lon_checks = ( 41 latlon_box["min_lon"] < latlon_box["max_lon"] 42 and latlon_box["max_lon"] <= 180 43 and latlon_box["min_lon"] >= -180 44 ) 45 46 return valid_lon_checks and valid_lat_checks
def
get_api_data( protocol, start_date='2017-05-31', end_date='2022-08-05', is_clean=True, latlon_box={'min_lat': -90, 'max_lat': 90, 'min_lon': -180, 'max_lon': 180})
49def get_api_data( 50 protocol, 51 start_date=start_date, 52 end_date=end_date, 53 is_clean=True, 54 latlon_box={"min_lat": -90, "max_lat": 90, "min_lon": -180, "max_lon": 180}, 55): 56 """Utility function for interfacing with the GLOBE API. 57 More information about the API can be viewed [here](https://www.globe.gov/es/globe-data/globe-api). 58 59 Parameters 60 ---------- 61 protocol : str 62 The desired GLOBE Observer Protocol. Protocols for the App protocols include: `land_covers` (Landcover), `mosquito_habitat_mapper` (Mosquito Habitat Mapper), `sky_conditions` (Clouds), `tree_heights` (Trees). 63 start_date : str, default= 2017-05-31 64 The desired start date of the dataset in the format of (YYYY-MM-DD). 65 end_date : str, default= today's date in YYYY-MM-DD form. 66 The desired end date of the dataset in the format of (YYYY-MM-DD). 67 latlon_box : dict of {str, double}, optional 68 The longitudes and latitudes of a bounding box for the dataset. The minimum/maximum latitudes and longitudes must be specified with the following keys: "min_lat", "min_lon", "max_lat", "max_lon". The default value specifies all latitude and longitude coordinates. 69 70 Returns 71 ------- 72 pd.DataFrame 73 A DataFrame containing Raw GLOBE Observer Data of the specified parameters 74 """ 75 76 if is_valid_latlon_box(latlon_box): 77 url = f"https://api.globe.gov/search/v1/measurement/protocol/measureddate/lat/lon/?protocols={protocol}&startdate={start_date}&enddate={end_date}&minlat={str(latlon_box['min_lat'])}&maxlat={str(latlon_box['max_lat'])}&minlon={str(latlon_box['min_lon'])}&maxlon={str(latlon_box['max_lon'])}&geojson=FALSE&sample=FALSE" 78 else: 79 logging.warning( 80 "You did not enter any valid/specific coordinates, so we gave you all the observations for your protocol, date_range, and any countryNames you may have specified.\n" 81 ) 82 url = f"https://api.globe.gov/search/v1/measurement/protocol/measureddate/?protocols={protocol}&startdate={start_date}&enddate={end_date}&geojson=FALSE&sample=FALSE" 83 84 # Downloads data from the GLOBE API 85 response = requests.get(url) 86 87 if not response: 88 raise RuntimeError( 89 "Failed to get data from the API. Double check your specified settings to make sure they are valid." 90 ) 91 92 # Convert measured date data into datetime 93 df = parse_api_data(response.json()) 94 convert_dates_to_datetime(df) 95 96 if is_clean: 97 df = default_data_clean(df, protocol) 98 return df
Utility function for interfacing with the GLOBE API. More information about the API can be viewed here.
Parameters
- protocol (str):
The desired GLOBE Observer Protocol. Protocols for the App protocols include:
land_covers
(Landcover),mosquito_habitat_mapper
(Mosquito Habitat Mapper),sky_conditions
(Clouds),tree_heights
(Trees). - start_date (str, default= 2017-05-31): The desired start date of the dataset in the format of (YYYY-MM-DD).
- end_date (str, default= today's date in YYYY-MM-DD form.): The desired end date of the dataset in the format of (YYYY-MM-DD).
- latlon_box (dict of {str, double}, optional): The longitudes and latitudes of a bounding box for the dataset. The minimum/maximum latitudes and longitudes must be specified with the following keys: "min_lat", "min_lon", "max_lat", "max_lon". The default value specifies all latitude and longitude coordinates.
Returns
- pd.DataFrame: A DataFrame containing Raw GLOBE Observer Data of the specified parameters
def
convert_dates_to_datetime(df)
def
default_data_clean(df, protocol)
107def default_data_clean(df, protocol): 108 module_mapper = {mosquito_protocol: mhm, landcover_protocol: lc} 109 if protocol in module_mapper: 110 df = module_mapper[protocol].apply_cleanup(df) 111 df = module_mapper[protocol].add_flags(df) 112 else: 113 logging.warning("The protocol you entered is not supported for cleanup.") 114 115 return df