Videos CSV Writer

Social Media

Purpose:

This program takes raw Facebook and Instagram data (from "Download my Information" on both platforms) and writes the videos' date, description, and uri to a spreadsheet for easier manipulation.

Dependencies

Python (with json, os, and datetime modules), ftfy

# REQUIRES os module and fix_text FROM ftfy (pip install ftfy)
# NOTE since it is a CSV, it must replace all "," instances in the description field with "`"'s.
# Whenever the data is used again, change "`"'s in the description field back to commas.

import json
import os
from datetime import datetime
from ftfy import fix_text

video_directory = "#DIRECTORY OF VIDEOS JSON FILE - ex. videos.json#"
base = "#DIRECTORY OF THE BASE FACEBOOK FOLDER ON MACHINE#"
video_descriptions_csv = "video_descriptions.csv"

# Opens the post directory and loads the json data to data.
j = open(video_directory)
data = json.load(j)

# Opens or creates a new csv at the base directory to add the videos to. If you want it to add to the table, change "w" to "a".
f = open(video_descriptions_csv, "a")

# Initializes some variables.
count = 0

# Corrects for encoded emoji's and @'s in a post's description.
def fix_all(description):
description = fix_text(description)
if "@[" in description:
description = fix_at(description)
return(description)

# Fixes the encoded @'s that sometimes show up in post captions.
def fix_at(description):
while "@[" in description:
at_start = description.find("@[") + len("@[")
temp_at = description[at_start:]
at_end = temp_at.find("]")
at = ((temp_at[:at_end]))
# Removes excess ":" from the @
while ":" in at:
at = at[(at.find(":")+1):]
# Once it is finished cleaning up the @, it replaces that encoded @ instance with the proper @ text.
encoded_at_start = description.find("@[")+1
temp_encoded_at = description[encoded_at_start:]
encoded_at_end = temp_encoded_at.find("]")+1
encoded_at = ((temp_encoded_at[:encoded_at_end]))
description = description.replace(encoded_at, at)
# Once all of the @'s have been corrected, it will return the corrected description.
return(description)

# Loops through all of the JSON data and writes the videos' information to a CSV table.
for i in data['videos']:
full_path = base + str(i['uri'])

# Finds and refines the video's description
if i["description"] != "":
description = str(i["description"])
fixed_description = fix_all(description)
else:
fixed_description = ""

# The timestamp is already saved correctly to the videos, but if you need it, you may use this code:
# if i['creation_timestamp'] != "":
# ts = int(i['creation_timestamp'])
# ts = datetime.utcfromtimestamp(ts).strftime('%Y:%m:%d %H:%M:%S')

if os.path.isfile(full_path):
fname_temp = (str(i['uri'])).rsplit("/")
fname = str(fname_temp[len(fname_temp) -1])
# Some of the videos are saved as mp4's without the file extension, so this adds it back so it can be played.
if ".mp4" not in fname:
fname += ".mp4"
no_commas = fixed_description.replace(",", "`")
# Writes the data to the csv.
f.write(fname + "," + no_commas.replace("\n", "\\n") + "\n")
continue

# Prints the program's post count
print("Found " + count + " videos!")