Comment to Description
This copies comments on public albums into the description field for the photo. This way, comments for the photos from contributors to the data can be saved globally and show up everywhere.
Python (with re and time modules), chromedriver, selenium, selenium stealth
# REQUIRES Selenium, selenium_stealth, and chromedriver installed on machine!
# pip install selenium
# pip install selenium_stealth
# Download latest chromedriver here: https://chromedriver.chromium.org/
# Put chromedriver in /usr/local/bin
# IMPORTANT If this is your first time running thise code, you will need to login to your Google Account manually before you can backup your photos. Follow these steps:
#1 This program will create a directory whereever this program is saved called "Chrome" where the profile data is stored. This will allow the webdriver to stay logged into your Google account. Keep this folder if you want to remian logged in.
#2 Run this program and login to your Google account. Afterwards, navigate to Google Photos and copy the link of where you want to start the scraper (This may be an album with all the photos that need descriptions). Then, set the "photos_start" varaible equal to that link, choose the amount of photos you want the program to loop over, and set the "logged_in" varialbe to True:
photos_start = '#LINK THE FIRST PHOTO WHERE THE SCRAPER SHOULD START#'
NUM_PHOTOS = 25
logged_in = False
#4 Google Photos' code can be weird sometimes. It might take a couple of runs before it starts to recognize the metadata fields, but once it does, it should run flawlessly afterwards.
import time
import re
from selenium import webdriver
from selenium_stealth import stealth
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.chrome.options import Options
# Sets all the necessary options for Selenium to run properly.
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
options.add_argument(r"--user-data-dir=Chrome/Default") #Path to your chrome profile
options.add_argument(r'--profile-directory=Profile 3')
driver = webdriver.Chrome(options=options, executable_path=r"/usr/local/bin/chromedriver")
# Sets the chromedriver to stealth mode so you can log in properly.
stealth(driver,
languages=["en-US", "en"],
vendor="Google Inc.",
platform="macos",
webgl_vendor="Intel Inc.",
renderer="Intel Iris OpenGL Engine",
fix_hairline=True,
)
# Initializes some variables:
photo_num = 0
# if an info panel fails to load, this runs through a sequence that guarenttes is shows up and is recorded
def error_solver(i, count):
if (count < 1):
print(count)
print('\x1b[6;30;42m' + "Solving Error..." + '\x1b[0m')
time.sleep(.1)
actions = ActionChains(driver)
actions.send_keys(Keys.LEFT)
actions.perform()
time.sleep(.1)
go_next()
count += 1
begin_find(count, i)
else:
print('\x1b[6;30;42m' + "Solving Stubborn Error..." + '\x1b[0m')
time.sleep(.75)
go_next()
time.sleep(.5)
go_next()
time.sleep(.5)
actions = ActionChains(driver)
actions.send_keys(Keys.LEFT)
actions.perform()
actions.perform()
time.sleep(.5)
count += 1
begin_find(count, i)
def begin_find(count, i):
# Finding Info Group with XPATH\
time.sleep(.2)
driver.find_element_by_tag_name('body').send_keys('i')
time.sleep(0.5)
info = '//*[@id="ow45"]/div[2]'
WebDriverWait(driver, 30).until(EC.visibility_of_all_elements_located((By.XPATH, info)))
Group1 = driver.find_elements_by_xpath(info)
for items in Group1:
# Splitting strings into the 3 pieces
text = items.get_attribute("innerHTML")
start = text.find("c-wiz")
end = text.find("</c-data></c-wiz>") + len("</c-data></c-wiz>")
# 1st piece
substring1 = text[start:end]
# 2nd piece --> need to make it so that if WUbige appears twice, it needs to cut into 3 pieces.
substring2 = text[end:]
# 3rd piece
twoend = substring2.find("</c-data></c-wiz>") + len("</c-data></c-wiz>")
substring3 = substring2[twoend:]
substring2 = substring2[:twoend]
#This determines which substring contains the active photo (it is random between the three)
if "style=\"display: none;\"" not in substring1:
print('\x1b[6;30;42m' + "found in 1" + '\x1b[0m')
spreadsheet(substring1, i, count)
elif "style=\"display: none;\"" not in substring2:
print('\x1b[6;30;42m' + "found in 2" + '\x1b[0m')
spreadsheet(substring2, i, count)
elif "style=\"display: none;\"" not in substring3:
print('\x1b[6;30;42m' + "found in 3" + '\x1b[0m')
spreadsheet(substring3, i, count)
elif "style=\"display: none;\"" in substring1 and substring2 and substring3:
print('\33[101m' + "Error: No Info found" + '\x1b[0m')
time.sleep(.5)
count += 1
if count == 3:
driver.close()
else:
error_solver(i, count)
if comments in description:
return True
else:
return False
def spreadsheet(a_substring, i, count):
#print(a_substring)
# finds and prints description
global description
if "class=\"n4jc2d\" aria-hidden=\"true\">" in a_substring:
des_start = a_substring.find("class=\"n4jc2d\" aria-hidden=\"true\">") + len("class=\"n4jc2d\" aria-hidden=\"true\">")
temp_des = a_substring[des_start:]
des_end = temp_des.find("</div>")
description = ("\"" + temp_des[:des_end] + "\"")
if "Add a description" in description:
description = ""
print("Description: " + description)
else:
description = ""
# first function that runs. It finds the comment panel and finds the string of comments for the active (viewing) photo
def get_comment(count, i):
time.sleep(1)
driver.find_element_by_xpath("//*[@id=\"yDmH0d\"]/c-wiz/div[4]/c-wiz/div[2]/div[2]/div/div/div[2]/div[2]").click()
time.sleep(2)
comments = driver.find_elements_by_xpath("//*[@id=\"yDmH0d\"]/c-wiz/div[4]/c-wiz/div[3]/c-wiz/div/div[2]/div/div/div[1]")
for items in comments:
# making the text
text = items.get_attribute("innerHTML")
# print(text + "\n" + "\n")
count = 0
format_comments("", text, count)
def format_comments(comment, text, count):
if "class=\"ZLAKfe\"><span>" in text:
user_start = text.find("class=\"ZLAKfe\"><span>") + len("class=\"ZLAKfe\"><span>")
temp_user = text[user_start:]
user_end = temp_user.find("</span>")
user_final = temp_user[:user_end]
for_date = temp_user[user_end:]
date_start = for_date.find("<span>") + len("<span>")
temp_date = for_date[date_start:]
date_end = temp_date.find("</span>")
date_final = temp_date[:date_end]
for_comment = temp_date[date_end:]
comment_start1 = for_comment.find("<div class=\"o4t7kc\"") + len("<div class=\"o4t7kc\"")
temp_comment1 = for_comment[comment_start1:]
comment_start2 = temp_comment1.find("<span>") + len("<span>")
temp_comment2 = temp_comment1[comment_start2:]
commend_end = temp_comment2.find("</span>")
comment_final = temp_comment2[:commend_end]
if (count == 0):
comment = user_final + " on " + date_final + ", 2020" + ": " + comment_final
else:
comment = (comment + "\n" + user_final + " on " + date_final + ", 2020" + ": " + comment_final)
count+= 1
print(comment)
maybe_more = temp_date
format_comments(comment, maybe_more, count)
else:
global comments
comments = str(comment)
return(text, comments)
def add_description(count):
print("starting")
des_path = "//*[@id=\"ow45\"]/div[2]/c-wiz[1]/div/div[2]/div/div/div/textarea"
des_path_2 = "//*[@id=\"ow45\"]/div[2]/c-wiz[2]/div/div[2]/div/div/div/textarea"
time.sleep(1)
# The description field is different for for the first photo vs. the rest.
if photo_num == 1:
driver.find_element_by_xpath(des_path).click()
# Since there may already be content in the description, it is necessary to key down to the botton of the field before typing.
for i in range(1,50):
driver.find_element_by_xpath(des_path).send_keys(Keys.DOWN)
driver.find_element_by_xpath(des_path).send_keys("\n" + comments + Keys.TAB)
elif photo_num > 1:
driver.find_element_by_xpath(des_path_2).click()
# Since there may already be content in the description, it is necessary to key down to the botton of the field before typing.
for i in range(1,50):
driver.find_element_by_xpath(des_path_2).send_keys(Keys.DOWN)
driver.find_element_by_xpath(des_path_2).send_keys("\n" + comments + Keys.TAB)
# pretty much clicks the next button to go to the next funtion
def go_next():
time.sleep(0.5)
actions = ActionChains(driver)
actions.send_keys(Keys.RIGHT)
actions.perform()
# Opens Chrome, runs though all of the photos specified according to NUM_PHOTOS, closing Chrome and the program at the end.
if logged_in:
driver.get(photos_start)
time.sleep(1)
for i in range(1, NUM_PHOTOS+1):
photo_num += 1
count = 0
get_comment(count, i)
comment_in_description = begin_find(count, i)
if comment_in_description == False:
print("here")
add_description(count)
go_next()
else:
print("comment already there")
go_next()
else:
driver.get("https://photos.google.com/login")
time.sleep(10000)
print('\x1b[6;30;42m' + "Finished commenting " + str(NUM_PHOTOS) + " photos!" + '\x1b[0m')
driver.quit()