go_utils.download

  1import logging
  2
  3import pandas as pd
  4import requests
  5
  6import go_utils.lc as lc
  7import go_utils.mhm as mhm
  8from go_utils.constants import (
  9    end_date,
 10    landcover_protocol,
 11    mosquito_protocol,
 12    start_date,
 13)
 14
 15
 16def parse_api_data(response_json):
 17    try:
 18        results = response_json["results"]
 19        df = pd.DataFrame(results)
 20    except KeyError:
 21        raise RuntimeError("Data Download Failed. The GLOBE API is most likely down.")
 22
 23    # Expand the 'data' column by listing the contents and passing as a new dataframe
 24    df = pd.concat([df, pd.DataFrame(list(df["data"]))], axis=1)
 25    # Drop the previously nested data column
 26    df = df.drop(labels="data", axis=1)
 27
 28    # Display the dataframe
 29    return df
 30
 31
 32def is_valid_latlon_box(latlon_box):
 33
 34    valid_lat_checks = (
 35        latlon_box["min_lat"] < latlon_box["max_lat"]
 36        and latlon_box["max_lat"] <= 90
 37        and latlon_box["min_lat"] >= -90
 38    )
 39    valid_lon_checks = (
 40        latlon_box["min_lon"] < latlon_box["max_lon"]
 41        and latlon_box["max_lon"] <= 180
 42        and latlon_box["min_lon"] >= -180
 43    )
 44
 45    return valid_lon_checks and valid_lat_checks
 46
 47
 48def get_api_data(
 49    protocol,
 50    start_date=start_date,
 51    end_date=end_date,
 52    is_clean=True,
 53    latlon_box={"min_lat": -90, "max_lat": 90, "min_lon": -180, "max_lon": 180},
 54):
 55    """Utility function for interfacing with the GLOBE API.
 56    More information about the API can be viewed [here](https://www.globe.gov/es/globe-data/globe-api).
 57
 58    Parameters
 59    ----------
 60    protocol : str
 61               The desired GLOBE Observer Protocol. Protocols for the App protocols include: `land_covers` (Landcover), `mosquito_habitat_mapper` (Mosquito Habitat Mapper), `sky_conditions` (Clouds), `tree_heights` (Trees).
 62    start_date : str, default= 2017-05-31
 63                 The desired start date of the dataset in the format of (YYYY-MM-DD).
 64    end_date : str, default= today's date in YYYY-MM-DD form.
 65               The desired end date of the dataset in the format of (YYYY-MM-DD).
 66    latlon_box : dict of {str, double}, optional
 67                 The longitudes and latitudes of a bounding box for the dataset. The minimum/maximum latitudes and longitudes must be specified with the following keys: "min_lat", "min_lon", "max_lat", "max_lon". The default value specifies all latitude and longitude coordinates.
 68
 69    Returns
 70    -------
 71    pd.DataFrame
 72      A DataFrame containing Raw GLOBE Observer Data of the specified parameters
 73    """
 74
 75    if is_valid_latlon_box(latlon_box):
 76        url = f"https://api.globe.gov/search/v1/measurement/protocol/measureddate/lat/lon/?protocols={protocol}&startdate={start_date}&enddate={end_date}&minlat={str(latlon_box['min_lat'])}&maxlat={str(latlon_box['max_lat'])}&minlon={str(latlon_box['min_lon'])}&maxlon={str(latlon_box['max_lon'])}&geojson=FALSE&sample=FALSE"
 77    else:
 78        logging.warning(
 79            "You did not enter any valid/specific coordinates, so we gave you all the observations for your protocol, date_range, and any countryNames you may have specified.\n"
 80        )
 81        url = f"https://api.globe.gov/search/v1/measurement/protocol/measureddate/?protocols={protocol}&startdate={start_date}&enddate={end_date}&geojson=FALSE&sample=FALSE"
 82
 83    # Downloads data from the GLOBE API
 84    response = requests.get(url)
 85
 86    if not response:
 87        raise RuntimeError(
 88            "Failed to get data from the API. Double check your specified settings to make sure they are valid."
 89        )
 90
 91    # Convert measured date data into datetime
 92    df = parse_api_data(response.json())
 93    convert_dates_to_datetime(df)
 94
 95    if is_clean:
 96        df = default_data_clean(df, protocol)
 97    return df
 98
 99
100def convert_dates_to_datetime(df):
101    date_columns = [col for col in df.columns if "Date" in col or "MeasuredAt" in col]
102    for column in date_columns:
103        df[column] = pd.to_datetime(df[column], errors="coerce")
104
105
106def default_data_clean(df, protocol):
107    module_mapper = {mosquito_protocol: mhm, landcover_protocol: lc}
108    if protocol in module_mapper:
109        df = module_mapper[protocol].apply_cleanup(df)
110        df = module_mapper[protocol].add_flags(df)
111    else:
112        logging.warning("The protocol you entered is not supported for cleanup.")
113
114    return df
def parse_api_data(response_json)
17def parse_api_data(response_json):
18    try:
19        results = response_json["results"]
20        df = pd.DataFrame(results)
21    except KeyError:
22        raise RuntimeError("Data Download Failed. The GLOBE API is most likely down.")
23
24    # Expand the 'data' column by listing the contents and passing as a new dataframe
25    df = pd.concat([df, pd.DataFrame(list(df["data"]))], axis=1)
26    # Drop the previously nested data column
27    df = df.drop(labels="data", axis=1)
28
29    # Display the dataframe
30    return df
def is_valid_latlon_box(latlon_box)
33def is_valid_latlon_box(latlon_box):
34
35    valid_lat_checks = (
36        latlon_box["min_lat"] < latlon_box["max_lat"]
37        and latlon_box["max_lat"] <= 90
38        and latlon_box["min_lat"] >= -90
39    )
40    valid_lon_checks = (
41        latlon_box["min_lon"] < latlon_box["max_lon"]
42        and latlon_box["max_lon"] <= 180
43        and latlon_box["min_lon"] >= -180
44    )
45
46    return valid_lon_checks and valid_lat_checks
def get_api_data( protocol, start_date='2017-05-31', end_date='2022-08-05', is_clean=True, latlon_box={'min_lat': -90, 'max_lat': 90, 'min_lon': -180, 'max_lon': 180})
49def get_api_data(
50    protocol,
51    start_date=start_date,
52    end_date=end_date,
53    is_clean=True,
54    latlon_box={"min_lat": -90, "max_lat": 90, "min_lon": -180, "max_lon": 180},
55):
56    """Utility function for interfacing with the GLOBE API.
57    More information about the API can be viewed [here](https://www.globe.gov/es/globe-data/globe-api).
58
59    Parameters
60    ----------
61    protocol : str
62               The desired GLOBE Observer Protocol. Protocols for the App protocols include: `land_covers` (Landcover), `mosquito_habitat_mapper` (Mosquito Habitat Mapper), `sky_conditions` (Clouds), `tree_heights` (Trees).
63    start_date : str, default= 2017-05-31
64                 The desired start date of the dataset in the format of (YYYY-MM-DD).
65    end_date : str, default= today's date in YYYY-MM-DD form.
66               The desired end date of the dataset in the format of (YYYY-MM-DD).
67    latlon_box : dict of {str, double}, optional
68                 The longitudes and latitudes of a bounding box for the dataset. The minimum/maximum latitudes and longitudes must be specified with the following keys: "min_lat", "min_lon", "max_lat", "max_lon". The default value specifies all latitude and longitude coordinates.
69
70    Returns
71    -------
72    pd.DataFrame
73      A DataFrame containing Raw GLOBE Observer Data of the specified parameters
74    """
75
76    if is_valid_latlon_box(latlon_box):
77        url = f"https://api.globe.gov/search/v1/measurement/protocol/measureddate/lat/lon/?protocols={protocol}&startdate={start_date}&enddate={end_date}&minlat={str(latlon_box['min_lat'])}&maxlat={str(latlon_box['max_lat'])}&minlon={str(latlon_box['min_lon'])}&maxlon={str(latlon_box['max_lon'])}&geojson=FALSE&sample=FALSE"
78    else:
79        logging.warning(
80            "You did not enter any valid/specific coordinates, so we gave you all the observations for your protocol, date_range, and any countryNames you may have specified.\n"
81        )
82        url = f"https://api.globe.gov/search/v1/measurement/protocol/measureddate/?protocols={protocol}&startdate={start_date}&enddate={end_date}&geojson=FALSE&sample=FALSE"
83
84    # Downloads data from the GLOBE API
85    response = requests.get(url)
86
87    if not response:
88        raise RuntimeError(
89            "Failed to get data from the API. Double check your specified settings to make sure they are valid."
90        )
91
92    # Convert measured date data into datetime
93    df = parse_api_data(response.json())
94    convert_dates_to_datetime(df)
95
96    if is_clean:
97        df = default_data_clean(df, protocol)
98    return df

Utility function for interfacing with the GLOBE API. More information about the API can be viewed here.

Parameters
  • protocol (str): The desired GLOBE Observer Protocol. Protocols for the App protocols include: land_covers (Landcover), mosquito_habitat_mapper (Mosquito Habitat Mapper), sky_conditions (Clouds), tree_heights (Trees).
  • start_date (str, default= 2017-05-31): The desired start date of the dataset in the format of (YYYY-MM-DD).
  • end_date (str, default= today's date in YYYY-MM-DD form.): The desired end date of the dataset in the format of (YYYY-MM-DD).
  • latlon_box (dict of {str, double}, optional): The longitudes and latitudes of a bounding box for the dataset. The minimum/maximum latitudes and longitudes must be specified with the following keys: "min_lat", "min_lon", "max_lat", "max_lon". The default value specifies all latitude and longitude coordinates.
Returns
  • pd.DataFrame: A DataFrame containing Raw GLOBE Observer Data of the specified parameters
def convert_dates_to_datetime(df)
101def convert_dates_to_datetime(df):
102    date_columns = [col for col in df.columns if "Date" in col or "MeasuredAt" in col]
103    for column in date_columns:
104        df[column] = pd.to_datetime(df[column], errors="coerce")
def default_data_clean(df, protocol)
107def default_data_clean(df, protocol):
108    module_mapper = {mosquito_protocol: mhm, landcover_protocol: lc}
109    if protocol in module_mapper:
110        df = module_mapper[protocol].apply_cleanup(df)
111        df = module_mapper[protocol].add_flags(df)
112    else:
113        logging.warning("The protocol you entered is not supported for cleanup.")
114
115    return df