go_utils.photo_download
1import os 2import re 3import warnings 4 5import numpy as np 6import pandas as pd 7import requests 8from PIL import Image 9 10 11def get_globe_photo_id(url: str): 12 """ 13 Gets the GLOBE Photo ID from a url 14 15 Parameters 16 ---------- 17 url : str 18 A url to a GLOBE Observer Image 19 """ 20 if pd.isna(url): 21 return None 22 else: 23 match_obj = re.search(r"(?<=\d\d\d\d\/\d\d\/\d\d\/).*(?=\/)", url) 24 if match_obj: 25 photo_id = match_obj.group(0) 26 return photo_id 27 return None 28 29 30def remove_bad_characters(filename: str): 31 """ 32 Removes erroneous characters from filenames. This includes the `/` character as this is assuming that the filename is being passed, not a path that may include that symbol as part of a directory. 33 34 Parameters 35 ---------- 36 filename : str 37 A possible filename. 38 39 Returns 40 ------- 41 str 42 The filename without any erroneous characters 43 """ 44 if pd.isna(filename): 45 return None 46 return re.sub(r"[<>:?\"/\\|*]", "", filename) 47 48 49def download_photo(url: str, directory: str, filename: str, resolution=None): 50 """ 51 Downloads a photo to a directory. 52 53 Parameters 54 ---------- 55 url : str 56 The URL to the photo 57 directory : str 58 The directory that the photo should be saved in 59 filename : str 60 The name of the photo 61 resolution : tuple of int, default = None 62 The image resolution in width x height. e.g. (1920, 1080) for a 1080p image. If the resolution is None, the original resolution of the photo is downloaded. 63 """ 64 if any(pd.isna(x) for x in [url, directory, filename]): 65 msg = f"Either url ({url}), directory ({directory}), or filename ({filename}) was None." 66 warnings.warn(msg) 67 else: 68 downloaded_obj = requests.get(url, allow_redirects=True) 69 filename = remove_bad_characters(filename) 70 out_path = os.path.join(directory, filename) 71 if not os.path.exists(directory): 72 os.mkdir(directory) 73 if pd.isna(resolution): 74 with open(out_path, "wb") as file: 75 file.write(downloaded_obj.content) 76 else: 77 get_img_at_resolution(url, out_path, resolution) 78 79 80def get_img_at_resolution(url, path, resolution): 81 """ 82 Downloads an image from a url at a specified resolution 83 84 Parameters 85 ---------- 86 url : str 87 An image URL 88 path : str 89 The filepath to save the image to 90 resolution : tuple of int 91 The image resolution in width x height. e.g. (1920, 1080) for a 1080p image. 92 """ 93 94 def get_img(): 95 with Image.open(requests.get(url, stream=True).raw) as img: 96 img.resize(resolution).save(path) 97 98 try: 99 get_img() 100 except Exception as e: # Sometimes the image download fails and it has to be rerun 101 warnings.warn(f"{url} failed due to {repr(e)}, retrying...") 102 try: 103 get_img() 104 warnings.warn("retry successful") 105 except Exception as e: 106 warnings.warn(f"{url} failed: {repr(e)}") 107 return 108 109 110def download_all_photos(targets): 111 """ 112 Downloads all photos given a list of targets which are tuples containing the url, directory, and filename. 113 114 Parameters 115 ---------- 116 targets : list of tuple of str 117 Contains tuples that store the url, directory, filename, and resolution (will be None to get original photo resolution) of the desired photos to be downloaded in that order. 118 """ 119 expectedNumParams = 4 120 if pd.isna(targets): 121 warnings.warn("Targets was none") 122 else: 123 for target in targets: 124 if (type(target) is tuple) and len(target) == expectedNumParams: 125 download_photo(*target) 126 else: 127 warnings.warn(f"Target incorrectly formatted: {target}") 128 129 130def _format_param_name(name: str): 131 if pd.isna(name): 132 return None 133 return ( 134 "".join(s.capitalize() + " " for s in name.split("_")) 135 .replace("Photo", "") 136 .strip() 137 ) 138 139 140# Constructs Photo Name using given included fields and additional information 141def _build_photo_name( 142 protocol, photo_id, name_fields, include_in_name=[], additional_name_stem="" 143): 144 valid_protocols = ["lc_", "mhm_"] 145 if not protocol or protocol not in valid_protocols: 146 warnings.warn("Invalid protocol") 147 return None 148 name = protocol 149 if additional_name_stem and additional_name_stem != "": 150 name += f"{additional_name_stem}_" 151 152 if include_in_name: 153 for field in list(include_in_name): 154 if field in set(name_fields): 155 name += f"{name_fields[field]}_" 156 157 name += f"{photo_id}.png" 158 name = remove_bad_characters(name) 159 return name 160 161 162def _get_mosquito_classification(genus, species): 163 classification = genus 164 if pd.isna(classification): 165 classification = "None" 166 elif not pd.isna(species): 167 classification = f"{classification} {species}" 168 return classification 169 170 171def _warn_num_invalid_photos(num_invalid_photos: dict): 172 if sum(num_invalid_photos.values()) > 0: 173 msg = f"Skipped {sum(num_invalid_photos.values())} invalid photos: " 174 msg += str(num_invalid_photos) 175 warnings.warn(msg) 176 177 178def get_mhm_download_targets( 179 mhm_df, 180 directory, 181 latitude_col="mhm_Latitude", 182 longitude_col="mhm_Longitude", 183 watersource_col="mhm_WaterSource", 184 date_col="mhm_measuredDate", 185 id_col="mhm_MosquitoHabitatMapperId", 186 genus_col="mhm_Genus", 187 species_col="mhm_Species", 188 larvae_photo="mhm_LarvaFullBodyPhotoUrls", 189 watersource_photo="mhm_WaterSourcePhotoUrls", 190 abdomen_photo="mhm_AbdomenCloseupPhotoUrls", 191 include_in_name=[], 192 additional_name_stem="", 193 resolution=None, 194): 195 """ 196 Generates mosquito habitat mapper targets to download 197 198 Parameters 199 ---------- 200 mhm_df : pd.DataFrame 201 Mosquito Habitat Mapper Data 202 directory : str 203 The directory to save the photos 204 latitude_col : str, default="mhm_Latitude" 205 The column name of the column that contains the Latitude 206 longitude_col : str, default="mhm_Longitude" 207 The column name of the column that contains the Longitude 208 watersource_col : str, default = "mhm_WaterSource" 209 The column name of the column that contains the watersource 210 date_col : str, default = "mhm_measuredDate" 211 The column name of the column that contains the measured date 212 id_col : str, default = "mhm_MosquitoHabitatMapperId" 213 The column name of the column that contains the mosquito habitat mapper id 214 genus_col : str, default = "mhm_Genus" 215 The column name of the column that contains the genus 216 species_col : str, default = "mhm_Species" 217 The column name of the column that contains the species 218 larvae_photo : str, default = "mhm_LarvaFullBodyPhotoUrls" 219 The column name of the column that contains the larvae photo urls. If not specified, the larvae photos will not be included. 220 watersource_photo : str, default = "mhm_WaterSourcePhotoUrls" 221 The column name of the column that contains the watersource photo urls. If not specified, the larvae photos will not be included. 222 abdomen_photo : str, default = "mhm_AbdomenCloseupPhotoUrls" 223 The column name of the column that contains the abdomen photo urls. If not specified, the larvae photos will not be included. 224 include_in_name : list of str, default=[] 225 A list of column names to include into the downloaded photo names. The order of items in this list is maintained in the outputted name 226 Accepted Included Names include: 227 * `url_type` -- Type of photo (e.g. Watersource, Larvae, Abdomen) 228 * `watersource` -- Watersource for the observed mosquito habitat 229 * `latitude` -- GPS Latitude Coordinate (rounded to 5 decimal places) 230 * `longitude` -- GPS Longitude Coordinate (rounded to 5 decimal places) 231 * `date_str` -- Date Range expressed as a String 232 * `mhm_id` -- Unique ID for a MHM observation 233 * `classification` -- Mosquito classification (or `"None"` if no classification available) 234 additional_name_stem : str, default="" 235 Additional custom information the user can add to the name. 236 resolution : tuple of int, default = None 237 The image resolution in width x height. e.g. (1920, 1080) for a 1080p image. If the resolution is None, the original resolution of the photo is downloaded. 238 239 Returns 240 ------- 241 set of tuple of str 242 Contains the (url, directory, and filename) for each desired mosquito habitat mapper photo 243 """ 244 arguments = locals() 245 targets = set() 246 num_invalid_photos = { 247 "invalid_URL": 0, 248 "rejected": 0, 249 "pending": 0, 250 "bad_photo_id": 0, 251 } 252 253 def get_photo_args( 254 url_entry, 255 url_type, 256 latitude, 257 longitude, 258 watersource, 259 date, 260 mhm_id, 261 genus, 262 species, 263 ): 264 if pd.isna(url_entry): 265 return 266 267 urls = url_entry.split(";") 268 date_str = pd.to_datetime(str(date)).strftime("%Y-%m-%d") 269 270 for url in urls: 271 if not pd.isna(url) and "https" in url: 272 photo_id = get_globe_photo_id(url) 273 274 name_fields = { 275 "url_type": url_type, 276 "watersource": watersource, 277 "latitude": round(latitude, 5), 278 "longitude": round(longitude, 5), 279 "date_str": date_str, 280 "mhm_id": mhm_id, 281 "classification": _get_mosquito_classification(genus, species), 282 } 283 284 # Checks photo_id is valid 285 if not pd.isna(photo_id) and int(photo_id) >= 0: 286 protocol = "mhm_" 287 name = _build_photo_name( 288 protocol, 289 photo_id, 290 name_fields, 291 include_in_name, 292 additional_name_stem, 293 ) 294 targets.add((url, directory, name, resolution)) 295 else: 296 num_invalid_photos["bad_photo_id"] += 1 297 elif not pd.isna(url) and "rejected" in url: 298 num_invalid_photos["rejected"] += 1 299 elif not pd.isna(url) and "pending" in url: 300 num_invalid_photos["pending"] += 1 301 else: 302 num_invalid_photos["invalid_URL"] += 1 303 304 photo_locations = {k: v for k, v in arguments.items() if "photo" in k} 305 for param_name, column_name in photo_locations.items(): 306 if column_name: 307 get_mosquito_args = np.vectorize(get_photo_args) 308 get_mosquito_args( 309 mhm_df[column_name].to_numpy(), 310 _format_param_name(param_name), 311 mhm_df[latitude_col].to_numpy(), 312 mhm_df[longitude_col].to_numpy(), 313 mhm_df[watersource_col].to_numpy(), 314 mhm_df[date_col], 315 mhm_df[id_col].to_numpy(), 316 mhm_df[genus_col].to_numpy(), 317 mhm_df[species_col].to_numpy() if species_col else "", 318 ) 319 _warn_num_invalid_photos(num_invalid_photos) 320 return targets 321 322 323def download_mhm_photos( 324 mhm_df, 325 directory, 326 latitude_col="mhm_Latitude", 327 longitude_col="mhm_Longitude", 328 watersource_col="mhm_WaterSource", 329 date_col="mhm_measuredDate", 330 id_col="mhm_MosquitoHabitatMapperId", 331 genus_col="mhm_Genus", 332 species_col="mhm_Species", 333 larvae_photo="mhm_LarvaFullBodyPhotoUrls", 334 watersource_photo="mhm_WaterSourcePhotoUrls", 335 abdomen_photo="mhm_AbdomenCloseupPhotoUrls", 336 include_in_name=[], 337 additional_name_stem="", 338 resolution=None, 339): 340 """ 341 Downloads mosquito habitat mapper photos 342 343 Parameters 344 ---------- 345 mhm_df : pd.DataFrame 346 Mosquito Habitat Mapper Data 347 directory : str 348 The directory to save the photos 349 latitude_col : str, default="mhm_Latitude" 350 The column name of the column that contains the Latitude 351 longitude_col : str, default="mhm_Longitude" 352 The column name of the column that contains the Longitude 353 watersource_col : str, default = "mhm_WaterSource" 354 The column name of the column that contains the watersource 355 date_col : str, default = "mhm_measuredDate" 356 The column name of the column that contains the measured date 357 id_col : str, default = "mhm_MosquitoHabitatMapperId" 358 The column name of the column that contains the mosquito habitat mapper id 359 genus_col : str, default = "mhm_Genus" 360 The column name of the column that contains the genus 361 species_col : str, default = "mhm_Species" 362 The column name of the column that contains the species 363 larvae_photo : str, default = "mhm_LarvaFullBodyPhotoUrls" 364 The column name of the column that contains the larvae photo urls. If not specified, the larvae photos will not be included. 365 watersource_photo : str, default = "mhm_WaterSourcePhotoUrls" 366 The column name of the column that contains the watersource photo urls. If not specified, the larvae photos will not be included. 367 abdomen_photo : str, default = "mhm_AbdomenCloseupPhotoUrls" 368 The column name of the column that contains the abdomen photo urls. If not specified, the larvae photos will not be included. 369 include_in_name : list of str, default=[] 370 A list of column names to include into the downloaded photo names. The order of items in this list is maintained in the outputted name list of column names to include into the downloaded photo names 371 Accepted Included Names include: 372 * `url_type` -- Type of photo (e.g. Watersource, Larvae, Abdomen) 373 * `watersource` -- Watersource for the observed mosquito habitat 374 * `latitude` -- GPS Latitude Coordinate (rounded to 5 decimal places) 375 * `longitude` -- GPS Longitude Coordinate (rounded to 5 decimal places) 376 * `date_str` -- Date Range expressed as a String 377 * `mhm_id` -- Unique ID for a MHM observation 378 * `classification` -- Mosquito classification (or `"None"` if no classification available) 379 additional_name_stem : str, default="" 380 Additional custom information the user can add to the name. 381 resolution : tuple of int, default = None 382 The image resolution in width x height. e.g. (1920, 1080) for a 1080p image. If the resolution is None, the original resolution of the photo is downloaded. 383 384 Returns 385 ------- 386 set of tuple of str 387 Contains the (url, directory, and filename) for each desired mosquito habitat mapper photo 388 """ 389 targets = get_mhm_download_targets(**locals()) 390 download_all_photos(targets) 391 return targets 392 393 394def get_lc_download_targets( 395 lc_df, 396 directory, 397 latitude_col="lc_Latitude", 398 longitude_col="lc_Longitude", 399 date_col="lc_measuredDate", 400 id_col="lc_LandCoverId", 401 up_photo="lc_UpwardPhotoUrl", 402 down_photo="lc_DownwardPhotoUrl", 403 north_photo="lc_NorthPhotoUrl", 404 south_photo="lc_SouthPhotoUrl", 405 east_photo="lc_EastPhotoUrl", 406 west_photo="lc_WestPhotoUrl", 407 include_in_name=[], 408 additional_name_stem="", 409 resolution=None, 410): 411 """ 412 Generates landcover targets to download 413 414 Parameters 415 ---------- 416 lc_df : pd.DataFrame 417 Cleaned and Flagged Landcover Data 418 directory : str 419 The directory to save the photos 420 latitude_col : str, default="lc_Latitude" 421 The column of the column that contains the Latitude 422 longitude_col : str, default="lc_Longitude" 423 The column of the column that contains the Longitude 424 date_col : str, default="lc_measuredDate" 425 The column name of the column that contains the measured date 426 id_col : str, default="lc_LandCoverId" 427 The column name of the column that contains the landcover id 428 up_photo : str, default = "lc_UpwardPhotoUrl" 429 The column name of the column that contains the upward photo urls. If not specified, these photos will not be included. 430 down_photo : str, default = "lc_DownwardPhotoUrl" 431 The column name of the column that contains the downward photo urls. If not specified, these photos will not be included. 432 north_photo : str, default = "lc_NorthPhotoUrl" 433 The column name of the column that contains the north photo urls. If not specified, these photos will not be included. 434 south_photo : str, default = "lc_SouthPhotoUrl" 435 The column name of the column that contains the south photo urls. If not specified, these photos will not be included. 436 east_photo : str, default = "lc_EastPhotoUrl" 437 The column name of the column that contains the east photo urls. If not specified, these photos will not be included. 438 west_photo : str, default = "lc_WestPhotoUrl" 439 The column name of the column that contains the west photo urls. If not specified, these photos will not be included. 440 include_in_name : list of str, default=[] 441 A list of column names to include into the downloaded photo names. The order of items in this list is maintained in the outputted name 442 Accepted Included Names include: 443 * `direction` -- Direction where the photo was taken (e.g. North, South, East, West, Up, Down) 444 * `latitude` -- GPS Latitude Coordinate (rounded to 5 decimal places) 445 * `longitude` -- GPS Longitude Coordinate (rounded to 5 decimal places) 446 * `date_str` -- Date Range expressed as a String 447 * `lc_id` -- Unique ID for a LC observation 448 additional_name_stem : str, default="" 449 Additional custom information the user can add to the name. 450 resolution : tuple of int, default = None 451 The image resolution in width x height. e.g. (1920, 1080) for a 1080p image. If the resolution is None, the original resolution of the photo is downloaded. 452 453 Returns 454 ------- 455 set of tuple of str 456 Contains the (url, directory, and filename) for each desired land cover photo 457 """ 458 arguments = locals() 459 targets = set() 460 num_invalid_photos = { 461 "invalid_URL": 0, 462 "rejected": 0, 463 "pending": 0, 464 "bad_photo_id": 0, 465 } 466 467 def get_photo_args(url, latitude, longitude, direction, date, lc_id): 468 if not pd.isna(url) and "https" in url: 469 date_str = pd.to_datetime(str(date)).strftime("%Y-%m-%d") 470 photo_id = get_globe_photo_id(url) 471 472 name_fields = { 473 "direction": direction, 474 "latitude": round(latitude, 5), 475 "longitude": round(longitude, 5), 476 "date_str": date_str, 477 "lc_id": lc_id, 478 } 479 480 if not pd.isna(photo_id) and int(photo_id) >= 0: 481 protocol = "lc_" 482 name = _build_photo_name( 483 protocol, 484 photo_id, 485 name_fields, 486 include_in_name, 487 additional_name_stem, 488 ) 489 targets.add((url, directory, name, resolution)) 490 else: 491 num_invalid_photos["bad_photo_id"] += 1 492 elif not pd.isna(url) and "rejected" in url: 493 num_invalid_photos["rejected"] += 1 494 elif not pd.isna(url) and "pending" in url: 495 num_invalid_photos["pending"] += 1 496 else: 497 num_invalid_photos["invalid_URL"] += 1 498 499 photo_locations = {k: v for k, v in arguments.items() if "photo" in k} 500 for param_name, column_name in photo_locations.items(): 501 if column_name: 502 get_lc_photo_args = np.vectorize(get_photo_args) 503 get_lc_photo_args( 504 lc_df[column_name].to_numpy(), 505 lc_df[latitude_col].to_numpy(), 506 lc_df[longitude_col].to_numpy(), 507 _format_param_name(param_name), 508 lc_df[date_col], 509 lc_df[id_col].to_numpy(), 510 ) 511 _warn_num_invalid_photos(num_invalid_photos) 512 return targets 513 514 515def download_lc_photos( 516 lc_df, 517 directory, 518 latitude_col="lc_Latitude", 519 longitude_col="lc_Longitude", 520 date_col="lc_measuredDate", 521 id_col="lc_LandCoverId", 522 up_photo="lc_UpwardPhotoUrl", 523 down_photo="lc_DownwardPhotoUrl", 524 north_photo="lc_NorthPhotoUrl", 525 south_photo="lc_SouthPhotoUrl", 526 east_photo="lc_EastPhotoUrl", 527 west_photo="lc_WestPhotoUrl", 528 include_in_name=[], 529 additional_name_stem="", 530 resolution=None, 531): 532 """ 533 Downloads Landcover photos for landcover data. 534 535 Parameters 536 ---------- 537 lc_df : pd.DataFrame 538 Cleaned and Flagged Landcover Data 539 directory : str 540 The directory to save the photos 541 latitude_col : str, default="lc_Latitude" 542 The column of the column that contains the Latitude 543 longitude_col : str, default="lc_Longitude" 544 The column of the column that contains the Longitude 545 date_col : str, default="lc_measuredDate" 546 The column name of the column that contains the measured date 547 id_col : str, default="lc_LandCoverId" 548 The column name of the column that contains the landcover id 549 up_photo : str, default = "lc_UpwardPhotoUrl" 550 The column name of the column that contains the upward photo urls. If not specified, these photos will not be included. 551 down_photo : str, default = "lc_DownwardPhotoUrl" 552 The column name of the column that contains the downward photo urls. If not specified, these photos will not be included. 553 north_photo : str, default = "lc_NorthPhotoUrl" 554 The column name of the column that contains the north photo urls. If not specified, these photos will not be included. 555 south_photo : str, default = "lc_SouthPhotoUrl" 556 The column name of the column that contains the south photo urls. If not specified, these photos will not be included. 557 east_photo : str, default = "lc_EastPhotoUrl" 558 The column name of the column that contains the east photo urls. If not specified, these photos will not be included. 559 west_photo : str, default = "lc_WestPhotoUrl" 560 The column name of the column that contains the west photo urls. If not specified, these photos will not be included. 561 include_in_name : list of str, default=[] 562 A list of column names to include into the downloaded photo names. The order of items in this list is maintained in the outputted name 563 Accepted Included Names include: 564 * `direction` -- Direction where the photo was taken (e.g. North, South, East, West, Up, Down) 565 * `latitude` -- GPS Latitude Coordinate (rounded to 5 decimal places) 566 * `longitude` -- GPS Longitude Coordinate (rounded to 5 decimal places) 567 * `date_str` -- Date Range expressed as a String 568 * `lc_id` -- Unique ID for a LC observation 569 additional_name_stem : str, default="" 570 Additional custom information the user can add to the name. 571 resolution : tuple of int, default = None 572 The image resolution in width x height. e.g. (1920, 1080) for a 1080p image. If the resolution is None, the original resolution of the photo is downloaded. 573 574 Returns 575 ------- 576 set of tuple of str 577 Contains the (url, directory, and filename) for each desired land cover photo 578 """ 579 targets = get_lc_download_targets(**locals()) 580 download_all_photos(targets) 581 return targets
def
get_globe_photo_id(url: str)
12def get_globe_photo_id(url: str): 13 """ 14 Gets the GLOBE Photo ID from a url 15 16 Parameters 17 ---------- 18 url : str 19 A url to a GLOBE Observer Image 20 """ 21 if pd.isna(url): 22 return None 23 else: 24 match_obj = re.search(r"(?<=\d\d\d\d\/\d\d\/\d\d\/).*(?=\/)", url) 25 if match_obj: 26 photo_id = match_obj.group(0) 27 return photo_id 28 return None
Gets the GLOBE Photo ID from a url
Parameters
- url (str): A url to a GLOBE Observer Image
def
remove_bad_characters(filename: str)
31def remove_bad_characters(filename: str): 32 """ 33 Removes erroneous characters from filenames. This includes the `/` character as this is assuming that the filename is being passed, not a path that may include that symbol as part of a directory. 34 35 Parameters 36 ---------- 37 filename : str 38 A possible filename. 39 40 Returns 41 ------- 42 str 43 The filename without any erroneous characters 44 """ 45 if pd.isna(filename): 46 return None 47 return re.sub(r"[<>:?\"/\\|*]", "", filename)
Removes erroneous characters from filenames. This includes the /
character as this is assuming that the filename is being passed, not a path that may include that symbol as part of a directory.
Parameters
- filename (str): A possible filename.
Returns
- str: The filename without any erroneous characters
def
download_photo(url: str, directory: str, filename: str, resolution=None)
50def download_photo(url: str, directory: str, filename: str, resolution=None): 51 """ 52 Downloads a photo to a directory. 53 54 Parameters 55 ---------- 56 url : str 57 The URL to the photo 58 directory : str 59 The directory that the photo should be saved in 60 filename : str 61 The name of the photo 62 resolution : tuple of int, default = None 63 The image resolution in width x height. e.g. (1920, 1080) for a 1080p image. If the resolution is None, the original resolution of the photo is downloaded. 64 """ 65 if any(pd.isna(x) for x in [url, directory, filename]): 66 msg = f"Either url ({url}), directory ({directory}), or filename ({filename}) was None." 67 warnings.warn(msg) 68 else: 69 downloaded_obj = requests.get(url, allow_redirects=True) 70 filename = remove_bad_characters(filename) 71 out_path = os.path.join(directory, filename) 72 if not os.path.exists(directory): 73 os.mkdir(directory) 74 if pd.isna(resolution): 75 with open(out_path, "wb") as file: 76 file.write(downloaded_obj.content) 77 else: 78 get_img_at_resolution(url, out_path, resolution)
Downloads a photo to a directory.
Parameters
- url (str): The URL to the photo
- directory (str): The directory that the photo should be saved in
- filename (str): The name of the photo
- resolution (tuple of int, default = None): The image resolution in width x height. e.g. (1920, 1080) for a 1080p image. If the resolution is None, the original resolution of the photo is downloaded.
def
get_img_at_resolution(url, path, resolution)
81def get_img_at_resolution(url, path, resolution): 82 """ 83 Downloads an image from a url at a specified resolution 84 85 Parameters 86 ---------- 87 url : str 88 An image URL 89 path : str 90 The filepath to save the image to 91 resolution : tuple of int 92 The image resolution in width x height. e.g. (1920, 1080) for a 1080p image. 93 """ 94 95 def get_img(): 96 with Image.open(requests.get(url, stream=True).raw) as img: 97 img.resize(resolution).save(path) 98 99 try: 100 get_img() 101 except Exception as e: # Sometimes the image download fails and it has to be rerun 102 warnings.warn(f"{url} failed due to {repr(e)}, retrying...") 103 try: 104 get_img() 105 warnings.warn("retry successful") 106 except Exception as e: 107 warnings.warn(f"{url} failed: {repr(e)}") 108 return
Downloads an image from a url at a specified resolution
Parameters
- url (str): An image URL
- path (str): The filepath to save the image to
- resolution (tuple of int): The image resolution in width x height. e.g. (1920, 1080) for a 1080p image.
def
download_all_photos(targets)
111def download_all_photos(targets): 112 """ 113 Downloads all photos given a list of targets which are tuples containing the url, directory, and filename. 114 115 Parameters 116 ---------- 117 targets : list of tuple of str 118 Contains tuples that store the url, directory, filename, and resolution (will be None to get original photo resolution) of the desired photos to be downloaded in that order. 119 """ 120 expectedNumParams = 4 121 if pd.isna(targets): 122 warnings.warn("Targets was none") 123 else: 124 for target in targets: 125 if (type(target) is tuple) and len(target) == expectedNumParams: 126 download_photo(*target) 127 else: 128 warnings.warn(f"Target incorrectly formatted: {target}")
Downloads all photos given a list of targets which are tuples containing the url, directory, and filename.
Parameters
- targets (list of tuple of str): Contains tuples that store the url, directory, filename, and resolution (will be None to get original photo resolution) of the desired photos to be downloaded in that order.
def
get_mhm_download_targets( mhm_df, directory, latitude_col='mhm_Latitude', longitude_col='mhm_Longitude', watersource_col='mhm_WaterSource', date_col='mhm_measuredDate', id_col='mhm_MosquitoHabitatMapperId', genus_col='mhm_Genus', species_col='mhm_Species', larvae_photo='mhm_LarvaFullBodyPhotoUrls', watersource_photo='mhm_WaterSourcePhotoUrls', abdomen_photo='mhm_AbdomenCloseupPhotoUrls', include_in_name=[], additional_name_stem='', resolution=None)
179def get_mhm_download_targets( 180 mhm_df, 181 directory, 182 latitude_col="mhm_Latitude", 183 longitude_col="mhm_Longitude", 184 watersource_col="mhm_WaterSource", 185 date_col="mhm_measuredDate", 186 id_col="mhm_MosquitoHabitatMapperId", 187 genus_col="mhm_Genus", 188 species_col="mhm_Species", 189 larvae_photo="mhm_LarvaFullBodyPhotoUrls", 190 watersource_photo="mhm_WaterSourcePhotoUrls", 191 abdomen_photo="mhm_AbdomenCloseupPhotoUrls", 192 include_in_name=[], 193 additional_name_stem="", 194 resolution=None, 195): 196 """ 197 Generates mosquito habitat mapper targets to download 198 199 Parameters 200 ---------- 201 mhm_df : pd.DataFrame 202 Mosquito Habitat Mapper Data 203 directory : str 204 The directory to save the photos 205 latitude_col : str, default="mhm_Latitude" 206 The column name of the column that contains the Latitude 207 longitude_col : str, default="mhm_Longitude" 208 The column name of the column that contains the Longitude 209 watersource_col : str, default = "mhm_WaterSource" 210 The column name of the column that contains the watersource 211 date_col : str, default = "mhm_measuredDate" 212 The column name of the column that contains the measured date 213 id_col : str, default = "mhm_MosquitoHabitatMapperId" 214 The column name of the column that contains the mosquito habitat mapper id 215 genus_col : str, default = "mhm_Genus" 216 The column name of the column that contains the genus 217 species_col : str, default = "mhm_Species" 218 The column name of the column that contains the species 219 larvae_photo : str, default = "mhm_LarvaFullBodyPhotoUrls" 220 The column name of the column that contains the larvae photo urls. If not specified, the larvae photos will not be included. 221 watersource_photo : str, default = "mhm_WaterSourcePhotoUrls" 222 The column name of the column that contains the watersource photo urls. If not specified, the larvae photos will not be included. 223 abdomen_photo : str, default = "mhm_AbdomenCloseupPhotoUrls" 224 The column name of the column that contains the abdomen photo urls. If not specified, the larvae photos will not be included. 225 include_in_name : list of str, default=[] 226 A list of column names to include into the downloaded photo names. The order of items in this list is maintained in the outputted name 227 Accepted Included Names include: 228 * `url_type` -- Type of photo (e.g. Watersource, Larvae, Abdomen) 229 * `watersource` -- Watersource for the observed mosquito habitat 230 * `latitude` -- GPS Latitude Coordinate (rounded to 5 decimal places) 231 * `longitude` -- GPS Longitude Coordinate (rounded to 5 decimal places) 232 * `date_str` -- Date Range expressed as a String 233 * `mhm_id` -- Unique ID for a MHM observation 234 * `classification` -- Mosquito classification (or `"None"` if no classification available) 235 additional_name_stem : str, default="" 236 Additional custom information the user can add to the name. 237 resolution : tuple of int, default = None 238 The image resolution in width x height. e.g. (1920, 1080) for a 1080p image. If the resolution is None, the original resolution of the photo is downloaded. 239 240 Returns 241 ------- 242 set of tuple of str 243 Contains the (url, directory, and filename) for each desired mosquito habitat mapper photo 244 """ 245 arguments = locals() 246 targets = set() 247 num_invalid_photos = { 248 "invalid_URL": 0, 249 "rejected": 0, 250 "pending": 0, 251 "bad_photo_id": 0, 252 } 253 254 def get_photo_args( 255 url_entry, 256 url_type, 257 latitude, 258 longitude, 259 watersource, 260 date, 261 mhm_id, 262 genus, 263 species, 264 ): 265 if pd.isna(url_entry): 266 return 267 268 urls = url_entry.split(";") 269 date_str = pd.to_datetime(str(date)).strftime("%Y-%m-%d") 270 271 for url in urls: 272 if not pd.isna(url) and "https" in url: 273 photo_id = get_globe_photo_id(url) 274 275 name_fields = { 276 "url_type": url_type, 277 "watersource": watersource, 278 "latitude": round(latitude, 5), 279 "longitude": round(longitude, 5), 280 "date_str": date_str, 281 "mhm_id": mhm_id, 282 "classification": _get_mosquito_classification(genus, species), 283 } 284 285 # Checks photo_id is valid 286 if not pd.isna(photo_id) and int(photo_id) >= 0: 287 protocol = "mhm_" 288 name = _build_photo_name( 289 protocol, 290 photo_id, 291 name_fields, 292 include_in_name, 293 additional_name_stem, 294 ) 295 targets.add((url, directory, name, resolution)) 296 else: 297 num_invalid_photos["bad_photo_id"] += 1 298 elif not pd.isna(url) and "rejected" in url: 299 num_invalid_photos["rejected"] += 1 300 elif not pd.isna(url) and "pending" in url: 301 num_invalid_photos["pending"] += 1 302 else: 303 num_invalid_photos["invalid_URL"] += 1 304 305 photo_locations = {k: v for k, v in arguments.items() if "photo" in k} 306 for param_name, column_name in photo_locations.items(): 307 if column_name: 308 get_mosquito_args = np.vectorize(get_photo_args) 309 get_mosquito_args( 310 mhm_df[column_name].to_numpy(), 311 _format_param_name(param_name), 312 mhm_df[latitude_col].to_numpy(), 313 mhm_df[longitude_col].to_numpy(), 314 mhm_df[watersource_col].to_numpy(), 315 mhm_df[date_col], 316 mhm_df[id_col].to_numpy(), 317 mhm_df[genus_col].to_numpy(), 318 mhm_df[species_col].to_numpy() if species_col else "", 319 ) 320 _warn_num_invalid_photos(num_invalid_photos) 321 return targets
Generates mosquito habitat mapper targets to download
Parameters
- mhm_df (pd.DataFrame): Mosquito Habitat Mapper Data
- directory (str): The directory to save the photos
- latitude_col (str, default="mhm_Latitude"): The column name of the column that contains the Latitude
- longitude_col (str, default="mhm_Longitude"): The column name of the column that contains the Longitude
- watersource_col (str, default = "mhm_WaterSource"): The column name of the column that contains the watersource
- date_col (str, default = "mhm_measuredDate"): The column name of the column that contains the measured date
- id_col (str, default = "mhm_MosquitoHabitatMapperId"): The column name of the column that contains the mosquito habitat mapper id
- genus_col (str, default = "mhm_Genus"): The column name of the column that contains the genus
- species_col (str, default = "mhm_Species"): The column name of the column that contains the species
- larvae_photo (str, default = "mhm_LarvaFullBodyPhotoUrls"): The column name of the column that contains the larvae photo urls. If not specified, the larvae photos will not be included.
- watersource_photo (str, default = "mhm_WaterSourcePhotoUrls"): The column name of the column that contains the watersource photo urls. If not specified, the larvae photos will not be included.
- abdomen_photo (str, default = "mhm_AbdomenCloseupPhotoUrls"): The column name of the column that contains the abdomen photo urls. If not specified, the larvae photos will not be included.
- include_in_name (list of str, default=[]):
A list of column names to include into the downloaded photo names. The order of items in this list is maintained in the outputted name
Accepted Included Names include:
url_type
-- Type of photo (e.g. Watersource, Larvae, Abdomen)watersource
-- Watersource for the observed mosquito habitatlatitude
-- GPS Latitude Coordinate (rounded to 5 decimal places)longitude
-- GPS Longitude Coordinate (rounded to 5 decimal places)date_str
-- Date Range expressed as a Stringmhm_id
-- Unique ID for a MHM observationclassification
-- Mosquito classification (or"None"
if no classification available)
- additional_name_stem (str, default=""): Additional custom information the user can add to the name.
- resolution (tuple of int, default = None): The image resolution in width x height. e.g. (1920, 1080) for a 1080p image. If the resolution is None, the original resolution of the photo is downloaded.
Returns
- set of tuple of str: Contains the (url, directory, and filename) for each desired mosquito habitat mapper photo
def
download_mhm_photos( mhm_df, directory, latitude_col='mhm_Latitude', longitude_col='mhm_Longitude', watersource_col='mhm_WaterSource', date_col='mhm_measuredDate', id_col='mhm_MosquitoHabitatMapperId', genus_col='mhm_Genus', species_col='mhm_Species', larvae_photo='mhm_LarvaFullBodyPhotoUrls', watersource_photo='mhm_WaterSourcePhotoUrls', abdomen_photo='mhm_AbdomenCloseupPhotoUrls', include_in_name=[], additional_name_stem='', resolution=None)
324def download_mhm_photos( 325 mhm_df, 326 directory, 327 latitude_col="mhm_Latitude", 328 longitude_col="mhm_Longitude", 329 watersource_col="mhm_WaterSource", 330 date_col="mhm_measuredDate", 331 id_col="mhm_MosquitoHabitatMapperId", 332 genus_col="mhm_Genus", 333 species_col="mhm_Species", 334 larvae_photo="mhm_LarvaFullBodyPhotoUrls", 335 watersource_photo="mhm_WaterSourcePhotoUrls", 336 abdomen_photo="mhm_AbdomenCloseupPhotoUrls", 337 include_in_name=[], 338 additional_name_stem="", 339 resolution=None, 340): 341 """ 342 Downloads mosquito habitat mapper photos 343 344 Parameters 345 ---------- 346 mhm_df : pd.DataFrame 347 Mosquito Habitat Mapper Data 348 directory : str 349 The directory to save the photos 350 latitude_col : str, default="mhm_Latitude" 351 The column name of the column that contains the Latitude 352 longitude_col : str, default="mhm_Longitude" 353 The column name of the column that contains the Longitude 354 watersource_col : str, default = "mhm_WaterSource" 355 The column name of the column that contains the watersource 356 date_col : str, default = "mhm_measuredDate" 357 The column name of the column that contains the measured date 358 id_col : str, default = "mhm_MosquitoHabitatMapperId" 359 The column name of the column that contains the mosquito habitat mapper id 360 genus_col : str, default = "mhm_Genus" 361 The column name of the column that contains the genus 362 species_col : str, default = "mhm_Species" 363 The column name of the column that contains the species 364 larvae_photo : str, default = "mhm_LarvaFullBodyPhotoUrls" 365 The column name of the column that contains the larvae photo urls. If not specified, the larvae photos will not be included. 366 watersource_photo : str, default = "mhm_WaterSourcePhotoUrls" 367 The column name of the column that contains the watersource photo urls. If not specified, the larvae photos will not be included. 368 abdomen_photo : str, default = "mhm_AbdomenCloseupPhotoUrls" 369 The column name of the column that contains the abdomen photo urls. If not specified, the larvae photos will not be included. 370 include_in_name : list of str, default=[] 371 A list of column names to include into the downloaded photo names. The order of items in this list is maintained in the outputted name list of column names to include into the downloaded photo names 372 Accepted Included Names include: 373 * `url_type` -- Type of photo (e.g. Watersource, Larvae, Abdomen) 374 * `watersource` -- Watersource for the observed mosquito habitat 375 * `latitude` -- GPS Latitude Coordinate (rounded to 5 decimal places) 376 * `longitude` -- GPS Longitude Coordinate (rounded to 5 decimal places) 377 * `date_str` -- Date Range expressed as a String 378 * `mhm_id` -- Unique ID for a MHM observation 379 * `classification` -- Mosquito classification (or `"None"` if no classification available) 380 additional_name_stem : str, default="" 381 Additional custom information the user can add to the name. 382 resolution : tuple of int, default = None 383 The image resolution in width x height. e.g. (1920, 1080) for a 1080p image. If the resolution is None, the original resolution of the photo is downloaded. 384 385 Returns 386 ------- 387 set of tuple of str 388 Contains the (url, directory, and filename) for each desired mosquito habitat mapper photo 389 """ 390 targets = get_mhm_download_targets(**locals()) 391 download_all_photos(targets) 392 return targets
Downloads mosquito habitat mapper photos
Parameters
- mhm_df (pd.DataFrame): Mosquito Habitat Mapper Data
- directory (str): The directory to save the photos
- latitude_col (str, default="mhm_Latitude"): The column name of the column that contains the Latitude
- longitude_col (str, default="mhm_Longitude"): The column name of the column that contains the Longitude
- watersource_col (str, default = "mhm_WaterSource"): The column name of the column that contains the watersource
- date_col (str, default = "mhm_measuredDate"): The column name of the column that contains the measured date
- id_col (str, default = "mhm_MosquitoHabitatMapperId"): The column name of the column that contains the mosquito habitat mapper id
- genus_col (str, default = "mhm_Genus"): The column name of the column that contains the genus
- species_col (str, default = "mhm_Species"): The column name of the column that contains the species
- larvae_photo (str, default = "mhm_LarvaFullBodyPhotoUrls"): The column name of the column that contains the larvae photo urls. If not specified, the larvae photos will not be included.
- watersource_photo (str, default = "mhm_WaterSourcePhotoUrls"): The column name of the column that contains the watersource photo urls. If not specified, the larvae photos will not be included.
- abdomen_photo (str, default = "mhm_AbdomenCloseupPhotoUrls"): The column name of the column that contains the abdomen photo urls. If not specified, the larvae photos will not be included.
- include_in_name (list of str, default=[]):
A list of column names to include into the downloaded photo names. The order of items in this list is maintained in the outputted name list of column names to include into the downloaded photo names
Accepted Included Names include:
url_type
-- Type of photo (e.g. Watersource, Larvae, Abdomen)watersource
-- Watersource for the observed mosquito habitatlatitude
-- GPS Latitude Coordinate (rounded to 5 decimal places)longitude
-- GPS Longitude Coordinate (rounded to 5 decimal places)date_str
-- Date Range expressed as a Stringmhm_id
-- Unique ID for a MHM observationclassification
-- Mosquito classification (or"None"
if no classification available)
- additional_name_stem (str, default=""): Additional custom information the user can add to the name.
- resolution (tuple of int, default = None): The image resolution in width x height. e.g. (1920, 1080) for a 1080p image. If the resolution is None, the original resolution of the photo is downloaded.
Returns
- set of tuple of str: Contains the (url, directory, and filename) for each desired mosquito habitat mapper photo
def
get_lc_download_targets( lc_df, directory, latitude_col='lc_Latitude', longitude_col='lc_Longitude', date_col='lc_measuredDate', id_col='lc_LandCoverId', up_photo='lc_UpwardPhotoUrl', down_photo='lc_DownwardPhotoUrl', north_photo='lc_NorthPhotoUrl', south_photo='lc_SouthPhotoUrl', east_photo='lc_EastPhotoUrl', west_photo='lc_WestPhotoUrl', include_in_name=[], additional_name_stem='', resolution=None)
395def get_lc_download_targets( 396 lc_df, 397 directory, 398 latitude_col="lc_Latitude", 399 longitude_col="lc_Longitude", 400 date_col="lc_measuredDate", 401 id_col="lc_LandCoverId", 402 up_photo="lc_UpwardPhotoUrl", 403 down_photo="lc_DownwardPhotoUrl", 404 north_photo="lc_NorthPhotoUrl", 405 south_photo="lc_SouthPhotoUrl", 406 east_photo="lc_EastPhotoUrl", 407 west_photo="lc_WestPhotoUrl", 408 include_in_name=[], 409 additional_name_stem="", 410 resolution=None, 411): 412 """ 413 Generates landcover targets to download 414 415 Parameters 416 ---------- 417 lc_df : pd.DataFrame 418 Cleaned and Flagged Landcover Data 419 directory : str 420 The directory to save the photos 421 latitude_col : str, default="lc_Latitude" 422 The column of the column that contains the Latitude 423 longitude_col : str, default="lc_Longitude" 424 The column of the column that contains the Longitude 425 date_col : str, default="lc_measuredDate" 426 The column name of the column that contains the measured date 427 id_col : str, default="lc_LandCoverId" 428 The column name of the column that contains the landcover id 429 up_photo : str, default = "lc_UpwardPhotoUrl" 430 The column name of the column that contains the upward photo urls. If not specified, these photos will not be included. 431 down_photo : str, default = "lc_DownwardPhotoUrl" 432 The column name of the column that contains the downward photo urls. If not specified, these photos will not be included. 433 north_photo : str, default = "lc_NorthPhotoUrl" 434 The column name of the column that contains the north photo urls. If not specified, these photos will not be included. 435 south_photo : str, default = "lc_SouthPhotoUrl" 436 The column name of the column that contains the south photo urls. If not specified, these photos will not be included. 437 east_photo : str, default = "lc_EastPhotoUrl" 438 The column name of the column that contains the east photo urls. If not specified, these photos will not be included. 439 west_photo : str, default = "lc_WestPhotoUrl" 440 The column name of the column that contains the west photo urls. If not specified, these photos will not be included. 441 include_in_name : list of str, default=[] 442 A list of column names to include into the downloaded photo names. The order of items in this list is maintained in the outputted name 443 Accepted Included Names include: 444 * `direction` -- Direction where the photo was taken (e.g. North, South, East, West, Up, Down) 445 * `latitude` -- GPS Latitude Coordinate (rounded to 5 decimal places) 446 * `longitude` -- GPS Longitude Coordinate (rounded to 5 decimal places) 447 * `date_str` -- Date Range expressed as a String 448 * `lc_id` -- Unique ID for a LC observation 449 additional_name_stem : str, default="" 450 Additional custom information the user can add to the name. 451 resolution : tuple of int, default = None 452 The image resolution in width x height. e.g. (1920, 1080) for a 1080p image. If the resolution is None, the original resolution of the photo is downloaded. 453 454 Returns 455 ------- 456 set of tuple of str 457 Contains the (url, directory, and filename) for each desired land cover photo 458 """ 459 arguments = locals() 460 targets = set() 461 num_invalid_photos = { 462 "invalid_URL": 0, 463 "rejected": 0, 464 "pending": 0, 465 "bad_photo_id": 0, 466 } 467 468 def get_photo_args(url, latitude, longitude, direction, date, lc_id): 469 if not pd.isna(url) and "https" in url: 470 date_str = pd.to_datetime(str(date)).strftime("%Y-%m-%d") 471 photo_id = get_globe_photo_id(url) 472 473 name_fields = { 474 "direction": direction, 475 "latitude": round(latitude, 5), 476 "longitude": round(longitude, 5), 477 "date_str": date_str, 478 "lc_id": lc_id, 479 } 480 481 if not pd.isna(photo_id) and int(photo_id) >= 0: 482 protocol = "lc_" 483 name = _build_photo_name( 484 protocol, 485 photo_id, 486 name_fields, 487 include_in_name, 488 additional_name_stem, 489 ) 490 targets.add((url, directory, name, resolution)) 491 else: 492 num_invalid_photos["bad_photo_id"] += 1 493 elif not pd.isna(url) and "rejected" in url: 494 num_invalid_photos["rejected"] += 1 495 elif not pd.isna(url) and "pending" in url: 496 num_invalid_photos["pending"] += 1 497 else: 498 num_invalid_photos["invalid_URL"] += 1 499 500 photo_locations = {k: v for k, v in arguments.items() if "photo" in k} 501 for param_name, column_name in photo_locations.items(): 502 if column_name: 503 get_lc_photo_args = np.vectorize(get_photo_args) 504 get_lc_photo_args( 505 lc_df[column_name].to_numpy(), 506 lc_df[latitude_col].to_numpy(), 507 lc_df[longitude_col].to_numpy(), 508 _format_param_name(param_name), 509 lc_df[date_col], 510 lc_df[id_col].to_numpy(), 511 ) 512 _warn_num_invalid_photos(num_invalid_photos) 513 return targets
Generates landcover targets to download
Parameters
- lc_df (pd.DataFrame): Cleaned and Flagged Landcover Data
- directory (str): The directory to save the photos
- latitude_col (str, default="lc_Latitude"): The column of the column that contains the Latitude
- longitude_col (str, default="lc_Longitude"): The column of the column that contains the Longitude
- date_col (str, default="lc_measuredDate"): The column name of the column that contains the measured date
- id_col (str, default="lc_LandCoverId"): The column name of the column that contains the landcover id
- up_photo (str, default = "lc_UpwardPhotoUrl"): The column name of the column that contains the upward photo urls. If not specified, these photos will not be included.
- down_photo (str, default = "lc_DownwardPhotoUrl"): The column name of the column that contains the downward photo urls. If not specified, these photos will not be included.
- north_photo (str, default = "lc_NorthPhotoUrl"): The column name of the column that contains the north photo urls. If not specified, these photos will not be included.
- south_photo (str, default = "lc_SouthPhotoUrl"): The column name of the column that contains the south photo urls. If not specified, these photos will not be included.
- east_photo (str, default = "lc_EastPhotoUrl"): The column name of the column that contains the east photo urls. If not specified, these photos will not be included.
- west_photo (str, default = "lc_WestPhotoUrl"): The column name of the column that contains the west photo urls. If not specified, these photos will not be included.
- include_in_name (list of str, default=[]):
A list of column names to include into the downloaded photo names. The order of items in this list is maintained in the outputted name
Accepted Included Names include:
direction
-- Direction where the photo was taken (e.g. North, South, East, West, Up, Down)latitude
-- GPS Latitude Coordinate (rounded to 5 decimal places)longitude
-- GPS Longitude Coordinate (rounded to 5 decimal places)date_str
-- Date Range expressed as a Stringlc_id
-- Unique ID for a LC observation
- additional_name_stem (str, default=""): Additional custom information the user can add to the name.
- resolution (tuple of int, default = None): The image resolution in width x height. e.g. (1920, 1080) for a 1080p image. If the resolution is None, the original resolution of the photo is downloaded.
Returns
- set of tuple of str: Contains the (url, directory, and filename) for each desired land cover photo
def
download_lc_photos( lc_df, directory, latitude_col='lc_Latitude', longitude_col='lc_Longitude', date_col='lc_measuredDate', id_col='lc_LandCoverId', up_photo='lc_UpwardPhotoUrl', down_photo='lc_DownwardPhotoUrl', north_photo='lc_NorthPhotoUrl', south_photo='lc_SouthPhotoUrl', east_photo='lc_EastPhotoUrl', west_photo='lc_WestPhotoUrl', include_in_name=[], additional_name_stem='', resolution=None)
516def download_lc_photos( 517 lc_df, 518 directory, 519 latitude_col="lc_Latitude", 520 longitude_col="lc_Longitude", 521 date_col="lc_measuredDate", 522 id_col="lc_LandCoverId", 523 up_photo="lc_UpwardPhotoUrl", 524 down_photo="lc_DownwardPhotoUrl", 525 north_photo="lc_NorthPhotoUrl", 526 south_photo="lc_SouthPhotoUrl", 527 east_photo="lc_EastPhotoUrl", 528 west_photo="lc_WestPhotoUrl", 529 include_in_name=[], 530 additional_name_stem="", 531 resolution=None, 532): 533 """ 534 Downloads Landcover photos for landcover data. 535 536 Parameters 537 ---------- 538 lc_df : pd.DataFrame 539 Cleaned and Flagged Landcover Data 540 directory : str 541 The directory to save the photos 542 latitude_col : str, default="lc_Latitude" 543 The column of the column that contains the Latitude 544 longitude_col : str, default="lc_Longitude" 545 The column of the column that contains the Longitude 546 date_col : str, default="lc_measuredDate" 547 The column name of the column that contains the measured date 548 id_col : str, default="lc_LandCoverId" 549 The column name of the column that contains the landcover id 550 up_photo : str, default = "lc_UpwardPhotoUrl" 551 The column name of the column that contains the upward photo urls. If not specified, these photos will not be included. 552 down_photo : str, default = "lc_DownwardPhotoUrl" 553 The column name of the column that contains the downward photo urls. If not specified, these photos will not be included. 554 north_photo : str, default = "lc_NorthPhotoUrl" 555 The column name of the column that contains the north photo urls. If not specified, these photos will not be included. 556 south_photo : str, default = "lc_SouthPhotoUrl" 557 The column name of the column that contains the south photo urls. If not specified, these photos will not be included. 558 east_photo : str, default = "lc_EastPhotoUrl" 559 The column name of the column that contains the east photo urls. If not specified, these photos will not be included. 560 west_photo : str, default = "lc_WestPhotoUrl" 561 The column name of the column that contains the west photo urls. If not specified, these photos will not be included. 562 include_in_name : list of str, default=[] 563 A list of column names to include into the downloaded photo names. The order of items in this list is maintained in the outputted name 564 Accepted Included Names include: 565 * `direction` -- Direction where the photo was taken (e.g. North, South, East, West, Up, Down) 566 * `latitude` -- GPS Latitude Coordinate (rounded to 5 decimal places) 567 * `longitude` -- GPS Longitude Coordinate (rounded to 5 decimal places) 568 * `date_str` -- Date Range expressed as a String 569 * `lc_id` -- Unique ID for a LC observation 570 additional_name_stem : str, default="" 571 Additional custom information the user can add to the name. 572 resolution : tuple of int, default = None 573 The image resolution in width x height. e.g. (1920, 1080) for a 1080p image. If the resolution is None, the original resolution of the photo is downloaded. 574 575 Returns 576 ------- 577 set of tuple of str 578 Contains the (url, directory, and filename) for each desired land cover photo 579 """ 580 targets = get_lc_download_targets(**locals()) 581 download_all_photos(targets) 582 return targets
Downloads Landcover photos for landcover data.
Parameters
- lc_df (pd.DataFrame): Cleaned and Flagged Landcover Data
- directory (str): The directory to save the photos
- latitude_col (str, default="lc_Latitude"): The column of the column that contains the Latitude
- longitude_col (str, default="lc_Longitude"): The column of the column that contains the Longitude
- date_col (str, default="lc_measuredDate"): The column name of the column that contains the measured date
- id_col (str, default="lc_LandCoverId"): The column name of the column that contains the landcover id
- up_photo (str, default = "lc_UpwardPhotoUrl"): The column name of the column that contains the upward photo urls. If not specified, these photos will not be included.
- down_photo (str, default = "lc_DownwardPhotoUrl"): The column name of the column that contains the downward photo urls. If not specified, these photos will not be included.
- north_photo (str, default = "lc_NorthPhotoUrl"): The column name of the column that contains the north photo urls. If not specified, these photos will not be included.
- south_photo (str, default = "lc_SouthPhotoUrl"): The column name of the column that contains the south photo urls. If not specified, these photos will not be included.
- east_photo (str, default = "lc_EastPhotoUrl"): The column name of the column that contains the east photo urls. If not specified, these photos will not be included.
- west_photo (str, default = "lc_WestPhotoUrl"): The column name of the column that contains the west photo urls. If not specified, these photos will not be included.
- include_in_name (list of str, default=[]):
A list of column names to include into the downloaded photo names. The order of items in this list is maintained in the outputted name
Accepted Included Names include:
direction
-- Direction where the photo was taken (e.g. North, South, East, West, Up, Down)latitude
-- GPS Latitude Coordinate (rounded to 5 decimal places)longitude
-- GPS Longitude Coordinate (rounded to 5 decimal places)date_str
-- Date Range expressed as a Stringlc_id
-- Unique ID for a LC observation
- additional_name_stem (str, default=""): Additional custom information the user can add to the name.
- resolution (tuple of int, default = None): The image resolution in width x height. e.g. (1920, 1080) for a 1080p image. If the resolution is None, the original resolution of the photo is downloaded.
Returns
- set of tuple of str: Contains the (url, directory, and filename) for each desired land cover photo