# This Code takes a folder containing text files, where each text file contains a list of (newline-separated)
# URLS. It then downloads the contents of each of the URLs into a folder, one folder per text file.
# NB: The folder containing the files

# Writes an object file representing a single opinion
import os
import urllib.request

# Key initial parameter: set name of folder
input_folder = "C:/Users/thech/Code/Federal Register URLs"
output_folder = "C:/Users/thech/Code/Fed Reg Plain Text Files"

# Generates file_array, which is an array of the names in the folder
for fileList in os.walk(input_folder):
    file_array = fileList[2]

# Opens a file to write any URLs that were inaccessible.
error_file = open(input_folder + "/errors.txt", "w")

# Steps through, opening each file in input_folder
for one_file in file_array:

    # Creates array of URLs (url_list) from one_file
    with open(input_folder + "/" + one_file) as my_file:
        url_list = my_file.readlines()

    # Creates new folder (one_file_folder) named after one_file
    one_file_folder = output_folder + "/" + one_file.replace(".txt", "")
    try:
        os.mkdir(one_file_folder)
    except:
        print("Folder already exists: " + one_file_folder)

    # Steps through url_list and downloads each of the URLs into one_file_folder
    for one_url in url_list:
        print("Loading: " + one_url)
        one_output_file = one_file_folder + "/" + one_url.split("/").pop().split(".txt")[0] + ".txt"
        print("Output file: " + one_output_file)
        try:
            urllib.request.urlretrieve(one_url, one_output_file)
        except:
            print("Unable to download the following URL: " + one_url)
            error_file.write("Unable to load: " + one_url)