go_utils.photo_download

  1import os
  2import re
  3import warnings
  4
  5import numpy as np
  6import pandas as pd
  7import requests
  8from PIL import Image
  9
 10
 11def get_globe_photo_id(url: str):
 12    """
 13    Gets the GLOBE Photo ID from a url
 14
 15    Parameters
 16    ----------
 17    url : str
 18      A url to a GLOBE Observer Image
 19    """
 20    if pd.isna(url):
 21        return None
 22    else:
 23        match_obj = re.search(r"(?<=\d\d\d\d\/\d\d\/\d\d\/).*(?=\/)", url)
 24        if match_obj:
 25            photo_id = match_obj.group(0)
 26            return photo_id
 27    return None
 28
 29
 30def remove_bad_characters(filename: str):
 31    """
 32    Removes erroneous characters from filenames. This includes the `/` character as this is assuming that the filename is being passed, not a path that may include that symbol as part of a directory.
 33
 34    Parameters
 35    ----------
 36    filename : str
 37      A possible filename.
 38
 39    Returns
 40    -------
 41    str
 42        The filename without any erroneous characters
 43    """
 44    if pd.isna(filename):
 45        return None
 46    return re.sub(r"[<>:?\"/\\|*]", "", filename)
 47
 48
 49def download_photo(url: str, directory: str, filename: str, resolution=None):
 50    """
 51    Downloads a photo to a directory.
 52
 53    Parameters
 54    ----------
 55    url : str
 56        The URL to the photo
 57    directory : str
 58        The directory that the photo should be saved in
 59    filename : str
 60        The name of the photo
 61    resolution : tuple of int, default = None
 62        The image resolution in width x height. e.g. (1920, 1080) for a 1080p image. If the resolution is None, the original resolution of the photo is downloaded.
 63    """
 64    if any(pd.isna(x) for x in [url, directory, filename]):
 65        msg = f"Either url ({url}), directory ({directory}), or filename ({filename}) was None."
 66        warnings.warn(msg)
 67    else:
 68        downloaded_obj = requests.get(url, allow_redirects=True)
 69        filename = remove_bad_characters(filename)
 70        out_path = os.path.join(directory, filename)
 71        if not os.path.exists(directory):
 72            os.mkdir(directory)
 73        if pd.isna(resolution):
 74            with open(out_path, "wb") as file:
 75                file.write(downloaded_obj.content)
 76        else:
 77            get_img_at_resolution(url, out_path, resolution)
 78
 79
 80def get_img_at_resolution(url, path, resolution):
 81    """
 82    Downloads an image from a url at a specified resolution
 83
 84    Parameters
 85    ----------
 86    url : str
 87        An image URL
 88    path : str
 89        The filepath to save the image to
 90    resolution : tuple of int
 91        The image resolution in width x height. e.g. (1920, 1080) for a 1080p image.
 92    """
 93
 94    def get_img():
 95        with Image.open(requests.get(url, stream=True).raw) as img:
 96            img.resize(resolution).save(path)
 97
 98    try:
 99        get_img()
100    except Exception as e:  # Sometimes the image download fails and it has to be rerun
101        warnings.warn(f"{url} failed due to {repr(e)}, retrying...")
102        try:
103            get_img()
104            warnings.warn("retry successful")
105        except Exception as e:
106            warnings.warn(f"{url} failed: {repr(e)}")
107            return
108
109
110def download_all_photos(targets):
111    """
112    Downloads all photos given a list of targets which are tuples containing the url, directory, and filename.
113
114    Parameters
115    ----------
116    targets : list of tuple of str
117        Contains tuples that store the url, directory, filename, and resolution (will be None to get original photo resolution) of the desired photos to be downloaded in that order.
118    """
119    expectedNumParams = 4
120    if pd.isna(targets):
121        warnings.warn("Targets was none")
122    else:
123        for target in targets:
124            if (type(target) is tuple) and len(target) == expectedNumParams:
125                download_photo(*target)
126            else:
127                warnings.warn(f"Target incorrectly formatted: {target}")
128
129
130def _format_param_name(name: str):
131    if pd.isna(name):
132        return None
133    return (
134        "".join(s.capitalize() + " " for s in name.split("_"))
135        .replace("Photo", "")
136        .strip()
137    )
138
139
140# Constructs Photo Name using given included fields and additional information
141def _build_photo_name(
142    protocol, photo_id, name_fields, include_in_name=[], additional_name_stem=""
143):
144    valid_protocols = ["lc_", "mhm_"]
145    if not protocol or protocol not in valid_protocols:
146        warnings.warn("Invalid protocol")
147        return None
148    name = protocol
149    if additional_name_stem and additional_name_stem != "":
150        name += f"{additional_name_stem}_"
151
152    if include_in_name:
153        for field in list(include_in_name):
154            if field in set(name_fields):
155                name += f"{name_fields[field]}_"
156
157    name += f"{photo_id}.png"
158    name = remove_bad_characters(name)
159    return name
160
161
162def _get_mosquito_classification(genus, species):
163    classification = genus
164    if pd.isna(classification):
165        classification = "None"
166    elif not pd.isna(species):
167        classification = f"{classification} {species}"
168    return classification
169
170
171def _warn_num_invalid_photos(num_invalid_photos: dict):
172    if sum(num_invalid_photos.values()) > 0:
173        msg = f"Skipped {sum(num_invalid_photos.values())} invalid photos: "
174        msg += str(num_invalid_photos)
175        warnings.warn(msg)
176
177
178def get_mhm_download_targets(
179    mhm_df,
180    directory,
181    latitude_col="mhm_Latitude",
182    longitude_col="mhm_Longitude",
183    watersource_col="mhm_WaterSource",
184    date_col="mhm_measuredDate",
185    id_col="mhm_MosquitoHabitatMapperId",
186    genus_col="mhm_Genus",
187    species_col="mhm_Species",
188    larvae_photo="mhm_LarvaFullBodyPhotoUrls",
189    watersource_photo="mhm_WaterSourcePhotoUrls",
190    abdomen_photo="mhm_AbdomenCloseupPhotoUrls",
191    include_in_name=[],
192    additional_name_stem="",
193    resolution=None,
194):
195    """
196    Generates mosquito habitat mapper targets to download
197
198    Parameters
199    ----------
200    mhm_df : pd.DataFrame
201        Mosquito Habitat Mapper Data
202    directory : str
203        The directory to save the photos
204    latitude_col : str, default="mhm_Latitude"
205        The column name of the column that contains the Latitude
206    longitude_col : str, default="mhm_Longitude"
207        The column name of the column that contains the Longitude
208    watersource_col : str, default = "mhm_WaterSource"
209        The column name of the column that contains the watersource
210    date_col : str, default = "mhm_measuredDate"
211        The column name of the column that contains the measured date
212    id_col : str, default = "mhm_MosquitoHabitatMapperId"
213        The column name of the column that contains the mosquito habitat mapper id
214    genus_col : str, default = "mhm_Genus"
215        The column name of the column that contains the genus
216    species_col : str, default = "mhm_Species"
217        The column name of the column that contains the species
218    larvae_photo : str, default = "mhm_LarvaFullBodyPhotoUrls"
219        The column name of the column that contains the larvae photo urls. If not specified, the larvae photos will not be included.
220    watersource_photo : str, default = "mhm_WaterSourcePhotoUrls"
221        The column name of the column that contains the watersource photo urls. If not specified, the larvae photos will not be included.
222    abdomen_photo : str, default = "mhm_AbdomenCloseupPhotoUrls"
223        The column name of the column that contains the abdomen photo urls. If not specified, the larvae photos will not be included.
224    include_in_name : list of str, default=[]
225        A list of column names to include into the downloaded photo names. The order of items in this list is maintained in the outputted name
226        Accepted Included Names include:
227            * `url_type` -- Type of photo (e.g. Watersource, Larvae, Abdomen)
228            * `watersource` -- Watersource for the observed mosquito habitat
229            * `latitude` -- GPS Latitude Coordinate (rounded to 5 decimal places)
230            * `longitude` -- GPS Longitude Coordinate (rounded to 5 decimal places)
231            * `date_str` -- Date Range expressed as a String
232            * `mhm_id` -- Unique ID for a MHM observation
233            * `classification` -- Mosquito classification (or `"None"` if no classification available)
234    additional_name_stem : str, default=""
235        Additional custom information the user can add to the name.
236    resolution : tuple of int, default = None
237        The image resolution in width x height. e.g. (1920, 1080) for a 1080p image. If the resolution is None, the original resolution of the photo is downloaded.
238
239    Returns
240    -------
241    set of tuple of str
242        Contains the (url, directory, and filename) for each desired mosquito habitat mapper photo
243    """
244    arguments = locals()
245    targets = set()
246    num_invalid_photos = {
247        "invalid_URL": 0,
248        "rejected": 0,
249        "pending": 0,
250        "bad_photo_id": 0,
251    }
252
253    def get_photo_args(
254        url_entry,
255        url_type,
256        latitude,
257        longitude,
258        watersource,
259        date,
260        mhm_id,
261        genus,
262        species,
263    ):
264        if pd.isna(url_entry):
265            return
266
267        urls = url_entry.split(";")
268        date_str = pd.to_datetime(str(date)).strftime("%Y-%m-%d")
269
270        for url in urls:
271            if not pd.isna(url) and "https" in url:
272                photo_id = get_globe_photo_id(url)
273
274                name_fields = {
275                    "url_type": url_type,
276                    "watersource": watersource,
277                    "latitude": round(latitude, 5),
278                    "longitude": round(longitude, 5),
279                    "date_str": date_str,
280                    "mhm_id": mhm_id,
281                    "classification": _get_mosquito_classification(genus, species),
282                }
283
284                # Checks photo_id is valid
285                if not pd.isna(photo_id) and int(photo_id) >= 0:
286                    protocol = "mhm_"
287                    name = _build_photo_name(
288                        protocol,
289                        photo_id,
290                        name_fields,
291                        include_in_name,
292                        additional_name_stem,
293                    )
294                    targets.add((url, directory, name, resolution))
295                else:
296                    num_invalid_photos["bad_photo_id"] += 1
297            elif not pd.isna(url) and "rejected" in url:
298                num_invalid_photos["rejected"] += 1
299            elif not pd.isna(url) and "pending" in url:
300                num_invalid_photos["pending"] += 1
301            else:
302                num_invalid_photos["invalid_URL"] += 1
303
304    photo_locations = {k: v for k, v in arguments.items() if "photo" in k}
305    for param_name, column_name in photo_locations.items():
306        if column_name:
307            get_mosquito_args = np.vectorize(get_photo_args)
308            get_mosquito_args(
309                mhm_df[column_name].to_numpy(),
310                _format_param_name(param_name),
311                mhm_df[latitude_col].to_numpy(),
312                mhm_df[longitude_col].to_numpy(),
313                mhm_df[watersource_col].to_numpy(),
314                mhm_df[date_col],
315                mhm_df[id_col].to_numpy(),
316                mhm_df[genus_col].to_numpy(),
317                mhm_df[species_col].to_numpy() if species_col else "",
318            )
319    _warn_num_invalid_photos(num_invalid_photos)
320    return targets
321
322
323def download_mhm_photos(
324    mhm_df,
325    directory,
326    latitude_col="mhm_Latitude",
327    longitude_col="mhm_Longitude",
328    watersource_col="mhm_WaterSource",
329    date_col="mhm_measuredDate",
330    id_col="mhm_MosquitoHabitatMapperId",
331    genus_col="mhm_Genus",
332    species_col="mhm_Species",
333    larvae_photo="mhm_LarvaFullBodyPhotoUrls",
334    watersource_photo="mhm_WaterSourcePhotoUrls",
335    abdomen_photo="mhm_AbdomenCloseupPhotoUrls",
336    include_in_name=[],
337    additional_name_stem="",
338    resolution=None,
339):
340    """
341    Downloads mosquito habitat mapper photos
342
343    Parameters
344    ----------
345    mhm_df : pd.DataFrame
346        Mosquito Habitat Mapper Data
347    directory : str
348        The directory to save the photos
349    latitude_col : str, default="mhm_Latitude"
350        The column name of the column that contains the Latitude
351    longitude_col : str, default="mhm_Longitude"
352        The column name of the column that contains the Longitude
353    watersource_col : str, default = "mhm_WaterSource"
354        The column name of the column that contains the watersource
355    date_col : str, default = "mhm_measuredDate"
356        The column name of the column that contains the measured date
357    id_col : str, default = "mhm_MosquitoHabitatMapperId"
358        The column name of the column that contains the mosquito habitat mapper id
359    genus_col : str, default = "mhm_Genus"
360        The column name of the column that contains the genus
361    species_col : str, default = "mhm_Species"
362        The column name of the column that contains the species
363    larvae_photo : str, default = "mhm_LarvaFullBodyPhotoUrls"
364        The column name of the column that contains the larvae photo urls. If not specified, the larvae photos will not be included.
365    watersource_photo : str, default = "mhm_WaterSourcePhotoUrls"
366        The column name of the column that contains the watersource photo urls. If not specified, the larvae photos will not be included.
367    abdomen_photo : str, default = "mhm_AbdomenCloseupPhotoUrls"
368        The column name of the column that contains the abdomen photo urls. If not specified, the larvae photos will not be included.
369    include_in_name : list of str, default=[]
370        A list of column names to include into the downloaded photo names. The order of items in this list is maintained in the outputted name list of column names to include into the downloaded photo names
371        Accepted Included Names include:
372            * `url_type` -- Type of photo (e.g. Watersource, Larvae, Abdomen)
373            * `watersource` -- Watersource for the observed mosquito habitat
374            * `latitude` -- GPS Latitude Coordinate (rounded to 5 decimal places)
375            * `longitude` -- GPS Longitude Coordinate (rounded to 5 decimal places)
376            * `date_str` -- Date Range expressed as a String
377            * `mhm_id` -- Unique ID for a MHM observation
378            * `classification` -- Mosquito classification (or `"None"` if no classification available)
379    additional_name_stem : str, default=""
380        Additional custom information the user can add to the name.
381    resolution : tuple of int, default = None
382        The image resolution in width x height. e.g. (1920, 1080) for a 1080p image. If the resolution is None, the original resolution of the photo is downloaded.
383
384    Returns
385    -------
386    set of tuple of str
387        Contains the (url, directory, and filename) for each desired mosquito habitat mapper photo
388    """
389    targets = get_mhm_download_targets(**locals())
390    download_all_photos(targets)
391    return targets
392
393
394def get_lc_download_targets(
395    lc_df,
396    directory,
397    latitude_col="lc_Latitude",
398    longitude_col="lc_Longitude",
399    date_col="lc_measuredDate",
400    id_col="lc_LandCoverId",
401    up_photo="lc_UpwardPhotoUrl",
402    down_photo="lc_DownwardPhotoUrl",
403    north_photo="lc_NorthPhotoUrl",
404    south_photo="lc_SouthPhotoUrl",
405    east_photo="lc_EastPhotoUrl",
406    west_photo="lc_WestPhotoUrl",
407    include_in_name=[],
408    additional_name_stem="",
409    resolution=None,
410):
411    """
412    Generates landcover targets to download
413
414    Parameters
415    ----------
416    lc_df : pd.DataFrame
417        Cleaned and Flagged Landcover Data
418    directory : str
419        The directory to save the photos
420    latitude_col : str, default="lc_Latitude"
421        The column of the column that contains the Latitude
422    longitude_col : str, default="lc_Longitude"
423        The column of the column that contains the Longitude
424    date_col : str, default="lc_measuredDate"
425        The column name of the column that contains the measured date
426    id_col : str, default="lc_LandCoverId"
427        The column name of the column that contains the landcover id
428    up_photo : str, default = "lc_UpwardPhotoUrl"
429        The column name of the column that contains the upward photo urls. If not specified, these photos will not be included.
430    down_photo : str, default = "lc_DownwardPhotoUrl"
431        The column name of the column that contains the downward photo urls. If not specified, these photos will not be included.
432    north_photo : str, default = "lc_NorthPhotoUrl"
433        The column name of the column that contains the north photo urls. If not specified, these photos will not be included.
434    south_photo : str, default = "lc_SouthPhotoUrl"
435        The column name of the column that contains the south photo urls. If not specified, these photos will not be included.
436    east_photo : str, default = "lc_EastPhotoUrl"
437        The column name of the column that contains the east photo urls. If not specified, these photos will not be included.
438    west_photo : str, default = "lc_WestPhotoUrl"
439        The column name of the column that contains the west photo urls. If not specified, these photos will not be included.
440    include_in_name : list of str, default=[]
441        A list of column names to include into the downloaded photo names. The order of items in this list is maintained in the outputted name
442        Accepted Included Names include:
443            * `direction` -- Direction where the photo was taken (e.g. North, South, East, West, Up, Down)
444            * `latitude` -- GPS Latitude Coordinate (rounded to 5 decimal places)
445            * `longitude` -- GPS Longitude Coordinate (rounded to 5 decimal places)
446            * `date_str` -- Date Range expressed as a String
447            * `lc_id` -- Unique ID for a LC observation
448    additional_name_stem : str, default=""
449        Additional custom information the user can add to the name.
450    resolution : tuple of int, default = None
451        The image resolution in width x height. e.g. (1920, 1080) for a 1080p image. If the resolution is None, the original resolution of the photo is downloaded.
452
453    Returns
454    -------
455    set of tuple of str
456        Contains the (url, directory, and filename) for each desired land cover photo
457    """
458    arguments = locals()
459    targets = set()
460    num_invalid_photos = {
461        "invalid_URL": 0,
462        "rejected": 0,
463        "pending": 0,
464        "bad_photo_id": 0,
465    }
466
467    def get_photo_args(url, latitude, longitude, direction, date, lc_id):
468        if not pd.isna(url) and "https" in url:
469            date_str = pd.to_datetime(str(date)).strftime("%Y-%m-%d")
470            photo_id = get_globe_photo_id(url)
471
472            name_fields = {
473                "direction": direction,
474                "latitude": round(latitude, 5),
475                "longitude": round(longitude, 5),
476                "date_str": date_str,
477                "lc_id": lc_id,
478            }
479
480            if not pd.isna(photo_id) and int(photo_id) >= 0:
481                protocol = "lc_"
482                name = _build_photo_name(
483                    protocol,
484                    photo_id,
485                    name_fields,
486                    include_in_name,
487                    additional_name_stem,
488                )
489                targets.add((url, directory, name, resolution))
490            else:
491                num_invalid_photos["bad_photo_id"] += 1
492        elif not pd.isna(url) and "rejected" in url:
493            num_invalid_photos["rejected"] += 1
494        elif not pd.isna(url) and "pending" in url:
495            num_invalid_photos["pending"] += 1
496        else:
497            num_invalid_photos["invalid_URL"] += 1
498
499    photo_locations = {k: v for k, v in arguments.items() if "photo" in k}
500    for param_name, column_name in photo_locations.items():
501        if column_name:
502            get_lc_photo_args = np.vectorize(get_photo_args)
503            get_lc_photo_args(
504                lc_df[column_name].to_numpy(),
505                lc_df[latitude_col].to_numpy(),
506                lc_df[longitude_col].to_numpy(),
507                _format_param_name(param_name),
508                lc_df[date_col],
509                lc_df[id_col].to_numpy(),
510            )
511    _warn_num_invalid_photos(num_invalid_photos)
512    return targets
513
514
515def download_lc_photos(
516    lc_df,
517    directory,
518    latitude_col="lc_Latitude",
519    longitude_col="lc_Longitude",
520    date_col="lc_measuredDate",
521    id_col="lc_LandCoverId",
522    up_photo="lc_UpwardPhotoUrl",
523    down_photo="lc_DownwardPhotoUrl",
524    north_photo="lc_NorthPhotoUrl",
525    south_photo="lc_SouthPhotoUrl",
526    east_photo="lc_EastPhotoUrl",
527    west_photo="lc_WestPhotoUrl",
528    include_in_name=[],
529    additional_name_stem="",
530    resolution=None,
531):
532    """
533    Downloads Landcover photos for landcover data.
534
535    Parameters
536    ----------
537    lc_df : pd.DataFrame
538        Cleaned and Flagged Landcover Data
539    directory : str
540        The directory to save the photos
541    latitude_col : str, default="lc_Latitude"
542        The column of the column that contains the Latitude
543    longitude_col : str, default="lc_Longitude"
544        The column of the column that contains the Longitude
545    date_col : str, default="lc_measuredDate"
546        The column name of the column that contains the measured date
547    id_col : str, default="lc_LandCoverId"
548        The column name of the column that contains the landcover id
549    up_photo : str, default = "lc_UpwardPhotoUrl"
550        The column name of the column that contains the upward photo urls. If not specified, these photos will not be included.
551    down_photo : str, default = "lc_DownwardPhotoUrl"
552        The column name of the column that contains the downward photo urls. If not specified, these photos will not be included.
553    north_photo : str, default = "lc_NorthPhotoUrl"
554        The column name of the column that contains the north photo urls. If not specified, these photos will not be included.
555    south_photo : str, default = "lc_SouthPhotoUrl"
556        The column name of the column that contains the south photo urls. If not specified, these photos will not be included.
557    east_photo : str, default = "lc_EastPhotoUrl"
558        The column name of the column that contains the east photo urls. If not specified, these photos will not be included.
559    west_photo : str, default = "lc_WestPhotoUrl"
560        The column name of the column that contains the west photo urls. If not specified, these photos will not be included.
561    include_in_name : list of str, default=[]
562        A list of column names to include into the downloaded photo names. The order of items in this list is maintained in the outputted name
563        Accepted Included Names include:
564            * `direction` -- Direction where the photo was taken (e.g. North, South, East, West, Up, Down)
565            * `latitude` -- GPS Latitude Coordinate (rounded to 5 decimal places)
566            * `longitude` -- GPS Longitude Coordinate (rounded to 5 decimal places)
567            * `date_str` -- Date Range expressed as a String
568            * `lc_id` -- Unique ID for a LC observation
569    additional_name_stem : str, default=""
570        Additional custom information the user can add to the name.
571    resolution : tuple of int, default = None
572        The image resolution in width x height. e.g. (1920, 1080) for a 1080p image. If the resolution is None, the original resolution of the photo is downloaded.
573
574    Returns
575    -------
576    set of tuple of str
577        Contains the (url, directory, and filename) for each desired land cover photo
578    """
579    targets = get_lc_download_targets(**locals())
580    download_all_photos(targets)
581    return targets
def get_globe_photo_id(url: str)
12def get_globe_photo_id(url: str):
13    """
14    Gets the GLOBE Photo ID from a url
15
16    Parameters
17    ----------
18    url : str
19      A url to a GLOBE Observer Image
20    """
21    if pd.isna(url):
22        return None
23    else:
24        match_obj = re.search(r"(?<=\d\d\d\d\/\d\d\/\d\d\/).*(?=\/)", url)
25        if match_obj:
26            photo_id = match_obj.group(0)
27            return photo_id
28    return None

Gets the GLOBE Photo ID from a url

Parameters
  • url (str): A url to a GLOBE Observer Image
def remove_bad_characters(filename: str)
31def remove_bad_characters(filename: str):
32    """
33    Removes erroneous characters from filenames. This includes the `/` character as this is assuming that the filename is being passed, not a path that may include that symbol as part of a directory.
34
35    Parameters
36    ----------
37    filename : str
38      A possible filename.
39
40    Returns
41    -------
42    str
43        The filename without any erroneous characters
44    """
45    if pd.isna(filename):
46        return None
47    return re.sub(r"[<>:?\"/\\|*]", "", filename)

Removes erroneous characters from filenames. This includes the / character as this is assuming that the filename is being passed, not a path that may include that symbol as part of a directory.

Parameters
  • filename (str): A possible filename.
Returns
  • str: The filename without any erroneous characters
def download_photo(url: str, directory: str, filename: str, resolution=None)
50def download_photo(url: str, directory: str, filename: str, resolution=None):
51    """
52    Downloads a photo to a directory.
53
54    Parameters
55    ----------
56    url : str
57        The URL to the photo
58    directory : str
59        The directory that the photo should be saved in
60    filename : str
61        The name of the photo
62    resolution : tuple of int, default = None
63        The image resolution in width x height. e.g. (1920, 1080) for a 1080p image. If the resolution is None, the original resolution of the photo is downloaded.
64    """
65    if any(pd.isna(x) for x in [url, directory, filename]):
66        msg = f"Either url ({url}), directory ({directory}), or filename ({filename}) was None."
67        warnings.warn(msg)
68    else:
69        downloaded_obj = requests.get(url, allow_redirects=True)
70        filename = remove_bad_characters(filename)
71        out_path = os.path.join(directory, filename)
72        if not os.path.exists(directory):
73            os.mkdir(directory)
74        if pd.isna(resolution):
75            with open(out_path, "wb") as file:
76                file.write(downloaded_obj.content)
77        else:
78            get_img_at_resolution(url, out_path, resolution)

Downloads a photo to a directory.

Parameters
  • url (str): The URL to the photo
  • directory (str): The directory that the photo should be saved in
  • filename (str): The name of the photo
  • resolution (tuple of int, default = None): The image resolution in width x height. e.g. (1920, 1080) for a 1080p image. If the resolution is None, the original resolution of the photo is downloaded.
def get_img_at_resolution(url, path, resolution)
 81def get_img_at_resolution(url, path, resolution):
 82    """
 83    Downloads an image from a url at a specified resolution
 84
 85    Parameters
 86    ----------
 87    url : str
 88        An image URL
 89    path : str
 90        The filepath to save the image to
 91    resolution : tuple of int
 92        The image resolution in width x height. e.g. (1920, 1080) for a 1080p image.
 93    """
 94
 95    def get_img():
 96        with Image.open(requests.get(url, stream=True).raw) as img:
 97            img.resize(resolution).save(path)
 98
 99    try:
100        get_img()
101    except Exception as e:  # Sometimes the image download fails and it has to be rerun
102        warnings.warn(f"{url} failed due to {repr(e)}, retrying...")
103        try:
104            get_img()
105            warnings.warn("retry successful")
106        except Exception as e:
107            warnings.warn(f"{url} failed: {repr(e)}")
108            return

Downloads an image from a url at a specified resolution

Parameters
  • url (str): An image URL
  • path (str): The filepath to save the image to
  • resolution (tuple of int): The image resolution in width x height. e.g. (1920, 1080) for a 1080p image.
def download_all_photos(targets)
111def download_all_photos(targets):
112    """
113    Downloads all photos given a list of targets which are tuples containing the url, directory, and filename.
114
115    Parameters
116    ----------
117    targets : list of tuple of str
118        Contains tuples that store the url, directory, filename, and resolution (will be None to get original photo resolution) of the desired photos to be downloaded in that order.
119    """
120    expectedNumParams = 4
121    if pd.isna(targets):
122        warnings.warn("Targets was none")
123    else:
124        for target in targets:
125            if (type(target) is tuple) and len(target) == expectedNumParams:
126                download_photo(*target)
127            else:
128                warnings.warn(f"Target incorrectly formatted: {target}")

Downloads all photos given a list of targets which are tuples containing the url, directory, and filename.

Parameters
  • targets (list of tuple of str): Contains tuples that store the url, directory, filename, and resolution (will be None to get original photo resolution) of the desired photos to be downloaded in that order.
def get_mhm_download_targets( mhm_df, directory, latitude_col='mhm_Latitude', longitude_col='mhm_Longitude', watersource_col='mhm_WaterSource', date_col='mhm_measuredDate', id_col='mhm_MosquitoHabitatMapperId', genus_col='mhm_Genus', species_col='mhm_Species', larvae_photo='mhm_LarvaFullBodyPhotoUrls', watersource_photo='mhm_WaterSourcePhotoUrls', abdomen_photo='mhm_AbdomenCloseupPhotoUrls', include_in_name=[], additional_name_stem='', resolution=None)
179def get_mhm_download_targets(
180    mhm_df,
181    directory,
182    latitude_col="mhm_Latitude",
183    longitude_col="mhm_Longitude",
184    watersource_col="mhm_WaterSource",
185    date_col="mhm_measuredDate",
186    id_col="mhm_MosquitoHabitatMapperId",
187    genus_col="mhm_Genus",
188    species_col="mhm_Species",
189    larvae_photo="mhm_LarvaFullBodyPhotoUrls",
190    watersource_photo="mhm_WaterSourcePhotoUrls",
191    abdomen_photo="mhm_AbdomenCloseupPhotoUrls",
192    include_in_name=[],
193    additional_name_stem="",
194    resolution=None,
195):
196    """
197    Generates mosquito habitat mapper targets to download
198
199    Parameters
200    ----------
201    mhm_df : pd.DataFrame
202        Mosquito Habitat Mapper Data
203    directory : str
204        The directory to save the photos
205    latitude_col : str, default="mhm_Latitude"
206        The column name of the column that contains the Latitude
207    longitude_col : str, default="mhm_Longitude"
208        The column name of the column that contains the Longitude
209    watersource_col : str, default = "mhm_WaterSource"
210        The column name of the column that contains the watersource
211    date_col : str, default = "mhm_measuredDate"
212        The column name of the column that contains the measured date
213    id_col : str, default = "mhm_MosquitoHabitatMapperId"
214        The column name of the column that contains the mosquito habitat mapper id
215    genus_col : str, default = "mhm_Genus"
216        The column name of the column that contains the genus
217    species_col : str, default = "mhm_Species"
218        The column name of the column that contains the species
219    larvae_photo : str, default = "mhm_LarvaFullBodyPhotoUrls"
220        The column name of the column that contains the larvae photo urls. If not specified, the larvae photos will not be included.
221    watersource_photo : str, default = "mhm_WaterSourcePhotoUrls"
222        The column name of the column that contains the watersource photo urls. If not specified, the larvae photos will not be included.
223    abdomen_photo : str, default = "mhm_AbdomenCloseupPhotoUrls"
224        The column name of the column that contains the abdomen photo urls. If not specified, the larvae photos will not be included.
225    include_in_name : list of str, default=[]
226        A list of column names to include into the downloaded photo names. The order of items in this list is maintained in the outputted name
227        Accepted Included Names include:
228            * `url_type` -- Type of photo (e.g. Watersource, Larvae, Abdomen)
229            * `watersource` -- Watersource for the observed mosquito habitat
230            * `latitude` -- GPS Latitude Coordinate (rounded to 5 decimal places)
231            * `longitude` -- GPS Longitude Coordinate (rounded to 5 decimal places)
232            * `date_str` -- Date Range expressed as a String
233            * `mhm_id` -- Unique ID for a MHM observation
234            * `classification` -- Mosquito classification (or `"None"` if no classification available)
235    additional_name_stem : str, default=""
236        Additional custom information the user can add to the name.
237    resolution : tuple of int, default = None
238        The image resolution in width x height. e.g. (1920, 1080) for a 1080p image. If the resolution is None, the original resolution of the photo is downloaded.
239
240    Returns
241    -------
242    set of tuple of str
243        Contains the (url, directory, and filename) for each desired mosquito habitat mapper photo
244    """
245    arguments = locals()
246    targets = set()
247    num_invalid_photos = {
248        "invalid_URL": 0,
249        "rejected": 0,
250        "pending": 0,
251        "bad_photo_id": 0,
252    }
253
254    def get_photo_args(
255        url_entry,
256        url_type,
257        latitude,
258        longitude,
259        watersource,
260        date,
261        mhm_id,
262        genus,
263        species,
264    ):
265        if pd.isna(url_entry):
266            return
267
268        urls = url_entry.split(";")
269        date_str = pd.to_datetime(str(date)).strftime("%Y-%m-%d")
270
271        for url in urls:
272            if not pd.isna(url) and "https" in url:
273                photo_id = get_globe_photo_id(url)
274
275                name_fields = {
276                    "url_type": url_type,
277                    "watersource": watersource,
278                    "latitude": round(latitude, 5),
279                    "longitude": round(longitude, 5),
280                    "date_str": date_str,
281                    "mhm_id": mhm_id,
282                    "classification": _get_mosquito_classification(genus, species),
283                }
284
285                # Checks photo_id is valid
286                if not pd.isna(photo_id) and int(photo_id) >= 0:
287                    protocol = "mhm_"
288                    name = _build_photo_name(
289                        protocol,
290                        photo_id,
291                        name_fields,
292                        include_in_name,
293                        additional_name_stem,
294                    )
295                    targets.add((url, directory, name, resolution))
296                else:
297                    num_invalid_photos["bad_photo_id"] += 1
298            elif not pd.isna(url) and "rejected" in url:
299                num_invalid_photos["rejected"] += 1
300            elif not pd.isna(url) and "pending" in url:
301                num_invalid_photos["pending"] += 1
302            else:
303                num_invalid_photos["invalid_URL"] += 1
304
305    photo_locations = {k: v for k, v in arguments.items() if "photo" in k}
306    for param_name, column_name in photo_locations.items():
307        if column_name:
308            get_mosquito_args = np.vectorize(get_photo_args)
309            get_mosquito_args(
310                mhm_df[column_name].to_numpy(),
311                _format_param_name(param_name),
312                mhm_df[latitude_col].to_numpy(),
313                mhm_df[longitude_col].to_numpy(),
314                mhm_df[watersource_col].to_numpy(),
315                mhm_df[date_col],
316                mhm_df[id_col].to_numpy(),
317                mhm_df[genus_col].to_numpy(),
318                mhm_df[species_col].to_numpy() if species_col else "",
319            )
320    _warn_num_invalid_photos(num_invalid_photos)
321    return targets

Generates mosquito habitat mapper targets to download

Parameters
  • mhm_df (pd.DataFrame): Mosquito Habitat Mapper Data
  • directory (str): The directory to save the photos
  • latitude_col (str, default="mhm_Latitude"): The column name of the column that contains the Latitude
  • longitude_col (str, default="mhm_Longitude"): The column name of the column that contains the Longitude
  • watersource_col (str, default = "mhm_WaterSource"): The column name of the column that contains the watersource
  • date_col (str, default = "mhm_measuredDate"): The column name of the column that contains the measured date
  • id_col (str, default = "mhm_MosquitoHabitatMapperId"): The column name of the column that contains the mosquito habitat mapper id
  • genus_col (str, default = "mhm_Genus"): The column name of the column that contains the genus
  • species_col (str, default = "mhm_Species"): The column name of the column that contains the species
  • larvae_photo (str, default = "mhm_LarvaFullBodyPhotoUrls"): The column name of the column that contains the larvae photo urls. If not specified, the larvae photos will not be included.
  • watersource_photo (str, default = "mhm_WaterSourcePhotoUrls"): The column name of the column that contains the watersource photo urls. If not specified, the larvae photos will not be included.
  • abdomen_photo (str, default = "mhm_AbdomenCloseupPhotoUrls"): The column name of the column that contains the abdomen photo urls. If not specified, the larvae photos will not be included.
  • include_in_name (list of str, default=[]): A list of column names to include into the downloaded photo names. The order of items in this list is maintained in the outputted name Accepted Included Names include:
    • url_type -- Type of photo (e.g. Watersource, Larvae, Abdomen)
    • watersource -- Watersource for the observed mosquito habitat
    • latitude -- GPS Latitude Coordinate (rounded to 5 decimal places)
    • longitude -- GPS Longitude Coordinate (rounded to 5 decimal places)
    • date_str -- Date Range expressed as a String
    • mhm_id -- Unique ID for a MHM observation
    • classification -- Mosquito classification (or "None" if no classification available)
  • additional_name_stem (str, default=""): Additional custom information the user can add to the name.
  • resolution (tuple of int, default = None): The image resolution in width x height. e.g. (1920, 1080) for a 1080p image. If the resolution is None, the original resolution of the photo is downloaded.
Returns
  • set of tuple of str: Contains the (url, directory, and filename) for each desired mosquito habitat mapper photo
def download_mhm_photos( mhm_df, directory, latitude_col='mhm_Latitude', longitude_col='mhm_Longitude', watersource_col='mhm_WaterSource', date_col='mhm_measuredDate', id_col='mhm_MosquitoHabitatMapperId', genus_col='mhm_Genus', species_col='mhm_Species', larvae_photo='mhm_LarvaFullBodyPhotoUrls', watersource_photo='mhm_WaterSourcePhotoUrls', abdomen_photo='mhm_AbdomenCloseupPhotoUrls', include_in_name=[], additional_name_stem='', resolution=None)
324def download_mhm_photos(
325    mhm_df,
326    directory,
327    latitude_col="mhm_Latitude",
328    longitude_col="mhm_Longitude",
329    watersource_col="mhm_WaterSource",
330    date_col="mhm_measuredDate",
331    id_col="mhm_MosquitoHabitatMapperId",
332    genus_col="mhm_Genus",
333    species_col="mhm_Species",
334    larvae_photo="mhm_LarvaFullBodyPhotoUrls",
335    watersource_photo="mhm_WaterSourcePhotoUrls",
336    abdomen_photo="mhm_AbdomenCloseupPhotoUrls",
337    include_in_name=[],
338    additional_name_stem="",
339    resolution=None,
340):
341    """
342    Downloads mosquito habitat mapper photos
343
344    Parameters
345    ----------
346    mhm_df : pd.DataFrame
347        Mosquito Habitat Mapper Data
348    directory : str
349        The directory to save the photos
350    latitude_col : str, default="mhm_Latitude"
351        The column name of the column that contains the Latitude
352    longitude_col : str, default="mhm_Longitude"
353        The column name of the column that contains the Longitude
354    watersource_col : str, default = "mhm_WaterSource"
355        The column name of the column that contains the watersource
356    date_col : str, default = "mhm_measuredDate"
357        The column name of the column that contains the measured date
358    id_col : str, default = "mhm_MosquitoHabitatMapperId"
359        The column name of the column that contains the mosquito habitat mapper id
360    genus_col : str, default = "mhm_Genus"
361        The column name of the column that contains the genus
362    species_col : str, default = "mhm_Species"
363        The column name of the column that contains the species
364    larvae_photo : str, default = "mhm_LarvaFullBodyPhotoUrls"
365        The column name of the column that contains the larvae photo urls. If not specified, the larvae photos will not be included.
366    watersource_photo : str, default = "mhm_WaterSourcePhotoUrls"
367        The column name of the column that contains the watersource photo urls. If not specified, the larvae photos will not be included.
368    abdomen_photo : str, default = "mhm_AbdomenCloseupPhotoUrls"
369        The column name of the column that contains the abdomen photo urls. If not specified, the larvae photos will not be included.
370    include_in_name : list of str, default=[]
371        A list of column names to include into the downloaded photo names. The order of items in this list is maintained in the outputted name list of column names to include into the downloaded photo names
372        Accepted Included Names include:
373            * `url_type` -- Type of photo (e.g. Watersource, Larvae, Abdomen)
374            * `watersource` -- Watersource for the observed mosquito habitat
375            * `latitude` -- GPS Latitude Coordinate (rounded to 5 decimal places)
376            * `longitude` -- GPS Longitude Coordinate (rounded to 5 decimal places)
377            * `date_str` -- Date Range expressed as a String
378            * `mhm_id` -- Unique ID for a MHM observation
379            * `classification` -- Mosquito classification (or `"None"` if no classification available)
380    additional_name_stem : str, default=""
381        Additional custom information the user can add to the name.
382    resolution : tuple of int, default = None
383        The image resolution in width x height. e.g. (1920, 1080) for a 1080p image. If the resolution is None, the original resolution of the photo is downloaded.
384
385    Returns
386    -------
387    set of tuple of str
388        Contains the (url, directory, and filename) for each desired mosquito habitat mapper photo
389    """
390    targets = get_mhm_download_targets(**locals())
391    download_all_photos(targets)
392    return targets

Downloads mosquito habitat mapper photos

Parameters
  • mhm_df (pd.DataFrame): Mosquito Habitat Mapper Data
  • directory (str): The directory to save the photos
  • latitude_col (str, default="mhm_Latitude"): The column name of the column that contains the Latitude
  • longitude_col (str, default="mhm_Longitude"): The column name of the column that contains the Longitude
  • watersource_col (str, default = "mhm_WaterSource"): The column name of the column that contains the watersource
  • date_col (str, default = "mhm_measuredDate"): The column name of the column that contains the measured date
  • id_col (str, default = "mhm_MosquitoHabitatMapperId"): The column name of the column that contains the mosquito habitat mapper id
  • genus_col (str, default = "mhm_Genus"): The column name of the column that contains the genus
  • species_col (str, default = "mhm_Species"): The column name of the column that contains the species
  • larvae_photo (str, default = "mhm_LarvaFullBodyPhotoUrls"): The column name of the column that contains the larvae photo urls. If not specified, the larvae photos will not be included.
  • watersource_photo (str, default = "mhm_WaterSourcePhotoUrls"): The column name of the column that contains the watersource photo urls. If not specified, the larvae photos will not be included.
  • abdomen_photo (str, default = "mhm_AbdomenCloseupPhotoUrls"): The column name of the column that contains the abdomen photo urls. If not specified, the larvae photos will not be included.
  • include_in_name (list of str, default=[]): A list of column names to include into the downloaded photo names. The order of items in this list is maintained in the outputted name list of column names to include into the downloaded photo names Accepted Included Names include:
    • url_type -- Type of photo (e.g. Watersource, Larvae, Abdomen)
    • watersource -- Watersource for the observed mosquito habitat
    • latitude -- GPS Latitude Coordinate (rounded to 5 decimal places)
    • longitude -- GPS Longitude Coordinate (rounded to 5 decimal places)
    • date_str -- Date Range expressed as a String
    • mhm_id -- Unique ID for a MHM observation
    • classification -- Mosquito classification (or "None" if no classification available)
  • additional_name_stem (str, default=""): Additional custom information the user can add to the name.
  • resolution (tuple of int, default = None): The image resolution in width x height. e.g. (1920, 1080) for a 1080p image. If the resolution is None, the original resolution of the photo is downloaded.
Returns
  • set of tuple of str: Contains the (url, directory, and filename) for each desired mosquito habitat mapper photo
def get_lc_download_targets( lc_df, directory, latitude_col='lc_Latitude', longitude_col='lc_Longitude', date_col='lc_measuredDate', id_col='lc_LandCoverId', up_photo='lc_UpwardPhotoUrl', down_photo='lc_DownwardPhotoUrl', north_photo='lc_NorthPhotoUrl', south_photo='lc_SouthPhotoUrl', east_photo='lc_EastPhotoUrl', west_photo='lc_WestPhotoUrl', include_in_name=[], additional_name_stem='', resolution=None)
395def get_lc_download_targets(
396    lc_df,
397    directory,
398    latitude_col="lc_Latitude",
399    longitude_col="lc_Longitude",
400    date_col="lc_measuredDate",
401    id_col="lc_LandCoverId",
402    up_photo="lc_UpwardPhotoUrl",
403    down_photo="lc_DownwardPhotoUrl",
404    north_photo="lc_NorthPhotoUrl",
405    south_photo="lc_SouthPhotoUrl",
406    east_photo="lc_EastPhotoUrl",
407    west_photo="lc_WestPhotoUrl",
408    include_in_name=[],
409    additional_name_stem="",
410    resolution=None,
411):
412    """
413    Generates landcover targets to download
414
415    Parameters
416    ----------
417    lc_df : pd.DataFrame
418        Cleaned and Flagged Landcover Data
419    directory : str
420        The directory to save the photos
421    latitude_col : str, default="lc_Latitude"
422        The column of the column that contains the Latitude
423    longitude_col : str, default="lc_Longitude"
424        The column of the column that contains the Longitude
425    date_col : str, default="lc_measuredDate"
426        The column name of the column that contains the measured date
427    id_col : str, default="lc_LandCoverId"
428        The column name of the column that contains the landcover id
429    up_photo : str, default = "lc_UpwardPhotoUrl"
430        The column name of the column that contains the upward photo urls. If not specified, these photos will not be included.
431    down_photo : str, default = "lc_DownwardPhotoUrl"
432        The column name of the column that contains the downward photo urls. If not specified, these photos will not be included.
433    north_photo : str, default = "lc_NorthPhotoUrl"
434        The column name of the column that contains the north photo urls. If not specified, these photos will not be included.
435    south_photo : str, default = "lc_SouthPhotoUrl"
436        The column name of the column that contains the south photo urls. If not specified, these photos will not be included.
437    east_photo : str, default = "lc_EastPhotoUrl"
438        The column name of the column that contains the east photo urls. If not specified, these photos will not be included.
439    west_photo : str, default = "lc_WestPhotoUrl"
440        The column name of the column that contains the west photo urls. If not specified, these photos will not be included.
441    include_in_name : list of str, default=[]
442        A list of column names to include into the downloaded photo names. The order of items in this list is maintained in the outputted name
443        Accepted Included Names include:
444            * `direction` -- Direction where the photo was taken (e.g. North, South, East, West, Up, Down)
445            * `latitude` -- GPS Latitude Coordinate (rounded to 5 decimal places)
446            * `longitude` -- GPS Longitude Coordinate (rounded to 5 decimal places)
447            * `date_str` -- Date Range expressed as a String
448            * `lc_id` -- Unique ID for a LC observation
449    additional_name_stem : str, default=""
450        Additional custom information the user can add to the name.
451    resolution : tuple of int, default = None
452        The image resolution in width x height. e.g. (1920, 1080) for a 1080p image. If the resolution is None, the original resolution of the photo is downloaded.
453
454    Returns
455    -------
456    set of tuple of str
457        Contains the (url, directory, and filename) for each desired land cover photo
458    """
459    arguments = locals()
460    targets = set()
461    num_invalid_photos = {
462        "invalid_URL": 0,
463        "rejected": 0,
464        "pending": 0,
465        "bad_photo_id": 0,
466    }
467
468    def get_photo_args(url, latitude, longitude, direction, date, lc_id):
469        if not pd.isna(url) and "https" in url:
470            date_str = pd.to_datetime(str(date)).strftime("%Y-%m-%d")
471            photo_id = get_globe_photo_id(url)
472
473            name_fields = {
474                "direction": direction,
475                "latitude": round(latitude, 5),
476                "longitude": round(longitude, 5),
477                "date_str": date_str,
478                "lc_id": lc_id,
479            }
480
481            if not pd.isna(photo_id) and int(photo_id) >= 0:
482                protocol = "lc_"
483                name = _build_photo_name(
484                    protocol,
485                    photo_id,
486                    name_fields,
487                    include_in_name,
488                    additional_name_stem,
489                )
490                targets.add((url, directory, name, resolution))
491            else:
492                num_invalid_photos["bad_photo_id"] += 1
493        elif not pd.isna(url) and "rejected" in url:
494            num_invalid_photos["rejected"] += 1
495        elif not pd.isna(url) and "pending" in url:
496            num_invalid_photos["pending"] += 1
497        else:
498            num_invalid_photos["invalid_URL"] += 1
499
500    photo_locations = {k: v for k, v in arguments.items() if "photo" in k}
501    for param_name, column_name in photo_locations.items():
502        if column_name:
503            get_lc_photo_args = np.vectorize(get_photo_args)
504            get_lc_photo_args(
505                lc_df[column_name].to_numpy(),
506                lc_df[latitude_col].to_numpy(),
507                lc_df[longitude_col].to_numpy(),
508                _format_param_name(param_name),
509                lc_df[date_col],
510                lc_df[id_col].to_numpy(),
511            )
512    _warn_num_invalid_photos(num_invalid_photos)
513    return targets

Generates landcover targets to download

Parameters
  • lc_df (pd.DataFrame): Cleaned and Flagged Landcover Data
  • directory (str): The directory to save the photos
  • latitude_col (str, default="lc_Latitude"): The column of the column that contains the Latitude
  • longitude_col (str, default="lc_Longitude"): The column of the column that contains the Longitude
  • date_col (str, default="lc_measuredDate"): The column name of the column that contains the measured date
  • id_col (str, default="lc_LandCoverId"): The column name of the column that contains the landcover id
  • up_photo (str, default = "lc_UpwardPhotoUrl"): The column name of the column that contains the upward photo urls. If not specified, these photos will not be included.
  • down_photo (str, default = "lc_DownwardPhotoUrl"): The column name of the column that contains the downward photo urls. If not specified, these photos will not be included.
  • north_photo (str, default = "lc_NorthPhotoUrl"): The column name of the column that contains the north photo urls. If not specified, these photos will not be included.
  • south_photo (str, default = "lc_SouthPhotoUrl"): The column name of the column that contains the south photo urls. If not specified, these photos will not be included.
  • east_photo (str, default = "lc_EastPhotoUrl"): The column name of the column that contains the east photo urls. If not specified, these photos will not be included.
  • west_photo (str, default = "lc_WestPhotoUrl"): The column name of the column that contains the west photo urls. If not specified, these photos will not be included.
  • include_in_name (list of str, default=[]): A list of column names to include into the downloaded photo names. The order of items in this list is maintained in the outputted name Accepted Included Names include:
    • direction -- Direction where the photo was taken (e.g. North, South, East, West, Up, Down)
    • latitude -- GPS Latitude Coordinate (rounded to 5 decimal places)
    • longitude -- GPS Longitude Coordinate (rounded to 5 decimal places)
    • date_str -- Date Range expressed as a String
    • lc_id -- Unique ID for a LC observation
  • additional_name_stem (str, default=""): Additional custom information the user can add to the name.
  • resolution (tuple of int, default = None): The image resolution in width x height. e.g. (1920, 1080) for a 1080p image. If the resolution is None, the original resolution of the photo is downloaded.
Returns
  • set of tuple of str: Contains the (url, directory, and filename) for each desired land cover photo
def download_lc_photos( lc_df, directory, latitude_col='lc_Latitude', longitude_col='lc_Longitude', date_col='lc_measuredDate', id_col='lc_LandCoverId', up_photo='lc_UpwardPhotoUrl', down_photo='lc_DownwardPhotoUrl', north_photo='lc_NorthPhotoUrl', south_photo='lc_SouthPhotoUrl', east_photo='lc_EastPhotoUrl', west_photo='lc_WestPhotoUrl', include_in_name=[], additional_name_stem='', resolution=None)
516def download_lc_photos(
517    lc_df,
518    directory,
519    latitude_col="lc_Latitude",
520    longitude_col="lc_Longitude",
521    date_col="lc_measuredDate",
522    id_col="lc_LandCoverId",
523    up_photo="lc_UpwardPhotoUrl",
524    down_photo="lc_DownwardPhotoUrl",
525    north_photo="lc_NorthPhotoUrl",
526    south_photo="lc_SouthPhotoUrl",
527    east_photo="lc_EastPhotoUrl",
528    west_photo="lc_WestPhotoUrl",
529    include_in_name=[],
530    additional_name_stem="",
531    resolution=None,
532):
533    """
534    Downloads Landcover photos for landcover data.
535
536    Parameters
537    ----------
538    lc_df : pd.DataFrame
539        Cleaned and Flagged Landcover Data
540    directory : str
541        The directory to save the photos
542    latitude_col : str, default="lc_Latitude"
543        The column of the column that contains the Latitude
544    longitude_col : str, default="lc_Longitude"
545        The column of the column that contains the Longitude
546    date_col : str, default="lc_measuredDate"
547        The column name of the column that contains the measured date
548    id_col : str, default="lc_LandCoverId"
549        The column name of the column that contains the landcover id
550    up_photo : str, default = "lc_UpwardPhotoUrl"
551        The column name of the column that contains the upward photo urls. If not specified, these photos will not be included.
552    down_photo : str, default = "lc_DownwardPhotoUrl"
553        The column name of the column that contains the downward photo urls. If not specified, these photos will not be included.
554    north_photo : str, default = "lc_NorthPhotoUrl"
555        The column name of the column that contains the north photo urls. If not specified, these photos will not be included.
556    south_photo : str, default = "lc_SouthPhotoUrl"
557        The column name of the column that contains the south photo urls. If not specified, these photos will not be included.
558    east_photo : str, default = "lc_EastPhotoUrl"
559        The column name of the column that contains the east photo urls. If not specified, these photos will not be included.
560    west_photo : str, default = "lc_WestPhotoUrl"
561        The column name of the column that contains the west photo urls. If not specified, these photos will not be included.
562    include_in_name : list of str, default=[]
563        A list of column names to include into the downloaded photo names. The order of items in this list is maintained in the outputted name
564        Accepted Included Names include:
565            * `direction` -- Direction where the photo was taken (e.g. North, South, East, West, Up, Down)
566            * `latitude` -- GPS Latitude Coordinate (rounded to 5 decimal places)
567            * `longitude` -- GPS Longitude Coordinate (rounded to 5 decimal places)
568            * `date_str` -- Date Range expressed as a String
569            * `lc_id` -- Unique ID for a LC observation
570    additional_name_stem : str, default=""
571        Additional custom information the user can add to the name.
572    resolution : tuple of int, default = None
573        The image resolution in width x height. e.g. (1920, 1080) for a 1080p image. If the resolution is None, the original resolution of the photo is downloaded.
574
575    Returns
576    -------
577    set of tuple of str
578        Contains the (url, directory, and filename) for each desired land cover photo
579    """
580    targets = get_lc_download_targets(**locals())
581    download_all_photos(targets)
582    return targets

Downloads Landcover photos for landcover data.

Parameters
  • lc_df (pd.DataFrame): Cleaned and Flagged Landcover Data
  • directory (str): The directory to save the photos
  • latitude_col (str, default="lc_Latitude"): The column of the column that contains the Latitude
  • longitude_col (str, default="lc_Longitude"): The column of the column that contains the Longitude
  • date_col (str, default="lc_measuredDate"): The column name of the column that contains the measured date
  • id_col (str, default="lc_LandCoverId"): The column name of the column that contains the landcover id
  • up_photo (str, default = "lc_UpwardPhotoUrl"): The column name of the column that contains the upward photo urls. If not specified, these photos will not be included.
  • down_photo (str, default = "lc_DownwardPhotoUrl"): The column name of the column that contains the downward photo urls. If not specified, these photos will not be included.
  • north_photo (str, default = "lc_NorthPhotoUrl"): The column name of the column that contains the north photo urls. If not specified, these photos will not be included.
  • south_photo (str, default = "lc_SouthPhotoUrl"): The column name of the column that contains the south photo urls. If not specified, these photos will not be included.
  • east_photo (str, default = "lc_EastPhotoUrl"): The column name of the column that contains the east photo urls. If not specified, these photos will not be included.
  • west_photo (str, default = "lc_WestPhotoUrl"): The column name of the column that contains the west photo urls. If not specified, these photos will not be included.
  • include_in_name (list of str, default=[]): A list of column names to include into the downloaded photo names. The order of items in this list is maintained in the outputted name Accepted Included Names include:
    • direction -- Direction where the photo was taken (e.g. North, South, East, West, Up, Down)
    • latitude -- GPS Latitude Coordinate (rounded to 5 decimal places)
    • longitude -- GPS Longitude Coordinate (rounded to 5 decimal places)
    • date_str -- Date Range expressed as a String
    • lc_id -- Unique ID for a LC observation
  • additional_name_stem (str, default=""): Additional custom information the user can add to the name.
  • resolution (tuple of int, default = None): The image resolution in width x height. e.g. (1920, 1080) for a 1080p image. If the resolution is None, the original resolution of the photo is downloaded.
Returns
  • set of tuple of str: Contains the (url, directory, and filename) for each desired land cover photo