Backup Google Photos
This program can backup all of the metadata displayed in the Google Photos "info" panel by iterating through each photo in a specified location.
Python (with re and time modules), chromedriver, selenium, selenium stealth
# REQUIRES Selenium, selenium_stealth, and chromedriver installed on machine!
# pip install selenium
# pip install selenium_stealth
# Download latest chromedriver here: https://chromedriver.chromium.org/
# Put chromedriver in /usr/local/bin
# IMPORTANT If this is your first time running thise code, you will need to login to your Google Account manually before you can backup your photos. Follow these steps:
#1 This program will create a directory whereever this program is saved called "Chrome" where the profile data is stored. This will allow the webdriver to stay logged into your Google account. Keep this folder if you want to remian logged in.
#2 Run this program and login to your Google account. Afterwards, navigate to Google Photos and copy the link of where you want to start the scraper (This may be the fist photo in your timeline, or the first photo in an album). Then, set the "photos_start" varaible equal to that link, choose the amount of photos you want the program to loop over, and set the "logged_in" varialbe to True:
photos_start = '#LINK THE FIRST PHOTO WHERE THE SCRAPER SHOULD START#'
NUM_PHOTOS = 25
logged_in = False
#4 Consider naming the CSV for the Google Photos data:
filename = "Google_Photos.csv"
#4 Google Photos' code can be weird sometimes. It might take a couple of runs before it starts to recognize the metadata fields, but once it does, it should run flawlessly afterwards.
import time
import re
from selenium import webdriver
from selenium_stealth import stealth
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.chrome.options import Options
# Sets all the necessary options for Selenium to run properly.
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
options.add_argument(r"--user-data-dir=Chrome/Default") #Path to your chrome profile
options.add_argument(r'--profile-directory=Profile 3')
driver = webdriver.Chrome(options=options, executable_path=r"/usr/local/bin/chromedriver")
# Sets the chromedriver to stealth mode so you can log in properly.
stealth(driver,
languages=["en-US", "en"],
vendor="Google Inc.",
platform="macos",
webgl_vendor="Intel Inc.",
renderer="Intel Iris OpenGL Engine",
fix_hairline=True,
)
# Writes the headers to the CSV file and prepares it to be filled with data.
f = open(filename, "w")
headers = "Photo, Title, Description, Date, Time, Location, Size, Dimentions, Faces, Albums \n"
f.write(headers)
f.close()
f = open(filename, "a")
# if an info panel fails to load, this runs through a sequence that guarenttes is shows up and is recorded.
def error_solver(i, count):
if (count < 1):
print(count)
print('\x1b[6;30;42m' + "Solving Error..." + '\x1b[0m')
time.sleep(.1)
actions = ActionChains(driver)
actions.send_keys(Keys.LEFT)
actions.perform()
time.sleep(.1)
go_next()
count += 1
begin_find(count, i)
else:
print('\x1b[6;30;42m' + "Solving Stubborn Error..." + '\x1b[0m')
time.sleep(.75)
go_next()
time.sleep(.5)
go_next()
time.sleep(.5)
actions = ActionChains(driver)
actions.send_keys(Keys.LEFT)
actions.perform()
actions.perform()
time.sleep(.5)
count += 1
begin_find(count, i)
# first function that runs. Tt finds the info panel and finds the string of info for the active (viewing) photo
def begin_find(count, i):
# Finding Info Group with XPATH
time.sleep(0.25)
info = './/*[@id="ow45"]/div[2]'
WebDriverWait(driver, 5).until(EC.visibility_of_all_elements_located((By.XPATH, info)))
Group1 = driver.find_elements_by_xpath(info)
for items in Group1:
# Splitting strings into the 3 pieces
text = items.get_attribute("innerHTML")
start = text.find("c-wiz")
end = text.find("</c-data></c-wiz>") + len("</c-data></c-wiz>")
# 1st piece
substring1 = text[start:end]
# 2nd piece --> need to make it so that if WUbige appears twice, it needs to cut into 3 pieces.
substring2 = text[end:]
# 3rd piece
twoend = substring2.find("</c-data></c-wiz>") + len("</c-data></c-wiz>")
substring3 = substring2[twoend:]
substring2 = substring2[:twoend]
#This determines which substring contains the active photo (it is random between the three)
if "style=\"display: none;\"" not in substring1:
spreadsheet(substring1, i, count)
elif "style=\"display: none;\"" not in substring2:
spreadsheet(substring2, i, count)
elif "style=\"display: none;\"" not in substring3:
spreadsheet(substring3, i, count)
# If no active data is found, there is a problem:
elif "style=\"display: none;\"" in substring1 and substring2 and substring3:
print('\x1b[6;30;42m' + "Error: No Info found" + '\x1b[0m')
time.sleep(.5)
count += 1
if count == 3:
driver.close()
else:
error_solver(i, count)
# Since most photos have multiple **faces**, this functions runs through all of the named ones and adds them to a string.
def find_more_faces(face, temp_face, face_end):
temp2_face = temp_face[face_end:]
while "<span class=\"Y8X4Pc\">" in temp2_face:
face_start = temp2_face.find("<span class=\"Y8X4Pc\">") + len("<span class=\"Y8X4Pc\">")
temp_face = temp2_face[face_start:]
face_end = temp_face.find("</span>")
face_final = temp_face[:face_end]
face = (face + "| " + face_final)
temp2_face = temp_face[face_end:]
all_faces = face
return(all_faces)
# Since most photos are in multiple **albums**, this functions runs through all of them and adds them to a string.
def find_more_albums(album, temp_album, album_end):
temp2_album = temp_album[album_end:]
while "<div class=\"AJM7gb\">" in temp2_album:
album_start = temp2_album.find("<div class=\"AJM7gb\">") + len("<div class=\"AJM7gb\">")
temp_album = temp2_album[album_start:]
album_end = temp_album.find("</div>")
album_final = temp_album[:album_end]
album = (album + "| " + album_final)
temp2_album = temp_album[album_end:]
all_albums = album
return(all_albums)
# finds data to import into spreadsheet(description, etc.)
def spreadsheet(a_substring, i, count):
#print(a_substring)
# finds and prints title
if "aria-label=\"Filename:" in a_substring:
title_start = a_substring.find("aria-label=\"Filename:") + len("aria-label=\"Filename:") + 1
temp_title = a_substring[title_start:]
title_end = temp_title.find("\">")
title = temp_title[:title_end]
print("Title: " + title)
else:
title = ""
# finds and prints description
if "class=\"n4jc2d\" aria-hidden=\"true\">" in a_substring:
des_start = a_substring.find("class=\"n4jc2d\" aria-hidden=\"true\">") + len("class=\"n4jc2d\" aria-hidden=\"true\">")
temp_des = a_substring[des_start:]
des_end = temp_des.find("</div>")
description = ("\"" + temp_des[:des_end] + "\"")
if "Add a description" in description:
description = ""
print("Description: " + description)
else:
description = ""
# finds and prints date
if "aria-label=\"Date:" in a_substring:
date_start = a_substring.find("aria-label=\"Date:") + len("aria-label=\"Date:") + 1
temp_date = a_substring[date_start:]
date_end = temp_date.find("\" jsname=\"pG3jE\"")
date = temp_date[:date_end]
print("Date: " + date)
else:
date = ""
# finds and prints time
if "aria-label=\"Date:" in a_substring:
time_start = a_substring.find("aria-label=\"Time:") + len("aria-label=\"Time:") + 1
temp_time = a_substring[time_start:]
time_end = temp_time.find("\">")
time = temp_time[:time_end]
print("Time: " + time)
else:
time = ""
# finds and prints location
if "jsname=\"rdDVee\"><div class=\"R9U8ab\">" in a_substring:
location_start = a_substring.find("jsname=\"rdDVee\"><div class=\"R9U8ab\">") + len("jsname=\"rdDVee\"><div class=\"R9U8ab\">")
temp_location = a_substring[location_start:]
location_end = temp_location.find("</div>")
location = temp_location[:location_end]
print("Location: " + location)
else:
location = ""
# finds and prints size
if "aria-label=\"File size:" in a_substring:
size_start = a_substring.find("aria-label=\"File size:") + len("aria-label=\"File size:") + 1
temp_size = a_substring[size_start:]
size_end = temp_size.find("\">")
size = temp_size[:size_end]
print("Size: " + size)
else:
size = ""
# finds and prints dimentions
if "aria-label=\"Size:" in a_substring:
dimentions_start = a_substring.find("aria-label=\"Size:") + len("aria-label=\"Size:") + 1
temp_dimentions = a_substring[dimentions_start:]
dimentions_end = temp_dimentions.find("\">")
dimentions = temp_dimentions[:dimentions_end]
print("Dimentions: " + dimentions)
else:
dimentions = ""
# finds and prints faces
if "<span class=\"Y8X4Pc\">" in a_substring:
face_start = a_substring.find("<span class=\"Y8X4Pc\">") + len("<span class=\"Y8X4Pc\">")
temp_face = a_substring[face_start:]
face_end = temp_face.find("</span>")
face = temp_face[:face_end]
all_faces = find_more_faces(face, temp_face, face_end)
print("Faces: " + all_faces)
else:
all_faces = ""
print("Faces: " + all_faces)
# finds and print albums
if "<div class=\"AJM7gb\">" in a_substring:
album_start = a_substring.find("<div class=\"AJM7gb\">") + len("<div class=\"AJM7gb\">")
temp_album = a_substring[album_start:]
album_end = temp_album.find("</div>")
album = temp_album[:album_end]
all_albums = find_more_albums(album, temp_album, album_end)
print("Albums: " + all_albums + "\n")
else:
all_albums = ""
print("Albums: " + str(all_albums) + "\n")
# if there is no title (filename), that means there must be an error, so it runs the error_solver funtion
if title == "":
error_solver(i, count)
else:
num = str(i)
f.write(num + "," + title.replace(",", "|") + "," + description.replace(",", "|") + "," + date.replace(",", "|") + "," + time.replace(",", "|") + "," + location.replace(",", "|")+ "," + size.replace(",", "|") + "," + dimentions.replace(",", "|") + "," + all_faces.replace(",", "|") + "," + all_albums.replace(",", "|") + "\n")
# pretty much clicks the next button to go to the next funtion
def go_next():
actions = ActionChains(driver)
actions.send_keys(Keys.RIGHT)
actions.perform()
time.sleep(0.25)
# running though all of the photos specified according to NUM_PHOTOS, closing Chrome and the program at the end.
if logged_in:
driver.get(photos_start)
time.sleep(1)
for i in range(1, NUM_PHOTOS+1):
count = 0
begin_find(count, i)
go_next()
print('\x1b[6;30;42m' + "Finished finding " + str(NUM_PHOTOS) + " photos!" + '\x1b[0m')
f.close()
driver.close()
else:
driver.get("https://photos.google.com/login")
time.sleep(10000)