Videos CSV Writer

Social Media
Purpose:

This program takes raw Facebook and Instagram data (from "Download my Information" on both platforms) and writes the videos' date, description, and uri to a spreadsheet for easier manipulation.

Dependencies

Python (with json, os, and datetime modules), ftfy

# REQUIRES os module and fix_text FROM ftfy (pip install ftfy)
# NOTE since it is a CSV, it must replace all "," instances in the description field with "`"'s.
# Whenever the data is used again, change "`"'s in the description field back to commas.

import json
import os
from datetime import datetime
from ftfy import fix_text

video_directory = "#DIRECTORY OF VIDEOS JSON FILE - ex. videos.json#"
base = "#DIRECTORY OF THE BASE FACEBOOK FOLDER ON MACHINE#"
video_descriptions_csv = "video_descriptions.csv"

# Opens the post directory and loads the json data to data.
j = open(video_directory)
data = json.load(j)

# Opens or creates a new csv at the base directory to add the videos to. If you want it to add to the table, change "w" to "a".
f = open(video_descriptions_csv, "a")

# Initializes some variables.
count = 0

# Corrects for encoded emoji's and @'s in a post's description.
def fix_all(description):
   description = fix_text(description)
   if "@[" in description:
       description = fix_at(description)
   return(description)

# Fixes the encoded @'s that sometimes show up in post captions.
def fix_at(description):
   while "@[" in description:
       at_start = description.find("@[") + len("@[")
       temp_at = description[at_start:]
       at_end = temp_at.find("]")
       at = ((temp_at[:at_end]))
       # Removes excess ":" from the @
       while ":" in at:
           at = at[(at.find(":")+1):]
       # Once it is finished cleaning up the @, it replaces that encoded @ instance with the proper @ text.
       encoded_at_start = description.find("@[")+1
       temp_encoded_at = description[encoded_at_start:]
       encoded_at_end = temp_encoded_at.find("]")+1
       encoded_at = ((temp_encoded_at[:encoded_at_end]))
       description = description.replace(encoded_at, at)
   # Once all of the @'s have been corrected, it will return the corrected description.
   return(description)

# Loops through all of the JSON data and writes the videos' information to a CSV table.
for i in data['videos']:
   full_path = base + str(i['uri'])

   # Finds and refines the video's description
   if i["description"] != "":
       description = str(i["description"])
       fixed_description = fix_all(description)
   else:
       fixed_description = ""

   # The timestamp is already saved correctly to the videos, but if you need it, you may use this code:
   # if i['creation_timestamp'] != "":
   #     ts = int(i['creation_timestamp'])
   #     ts = datetime.utcfromtimestamp(ts).strftime('%Y:%m:%d %H:%M:%S')

   if os.path.isfile(full_path):
       fname_temp = (str(i['uri'])).rsplit("/")
       fname = str(fname_temp[len(fname_temp) -1])
       # Some of the videos are saved as mp4's without the file extension, so this adds it back so it can be played.
       if ".mp4" not in fname:
           fname += ".mp4"
       no_commas = fixed_description.replace(",", "`")
       # Writes the data to the csv.
       f.write(fname + "," + no_commas.replace("\n", "\\n") + "\n")
       continue

# Prints the program's post count
print("Found " + count + " videos!")