# The purpose of this program is to calculate TF scores with respect to all Tax Court opinions in a particular folder. from Opinion import republican_judge_surnames, democrat_judge_surnames, all_judge_surnames, unknown_judge_surnames, \ pre_process_TC, correct_judge_name, default_true, irs_wins, taxpayer_wins, did_taxpayer_win, is_republican, is_democrat from Text_Scoring import leg_hist_count, investigate_term_use from Text_Scoring import textualist_count, textualist_count_ex_dictionaries from Text_Scoring import document_word_count from Text_Scoring import normative_count from Text_Scoring import leg_hist_list from Text_Scoring import textualist_list, textualist_list_ex_dictionaries from Text_Scoring import normative_list from Text_Scoring import interpretive_count from Text_Scoring import interpretive_list from Text_Scoring import normative_yes, interpretive_yes, leg_hist_yes, textualist_yes from Text_Scoring import common_term_count from Text_Scoring import Tf_counter from Text_Scoring import tax_canon_list_of_lists import json_lines from random import randrange # PARAMETERS bta_opinion_file_loc = "C:/Users/thech/Documents/Data/Tax Court Data - case.law/Board of Tax Appeals Opinions.jsonl" tax_ct_opinion_file_loc = "C:/Users/thech/Documents/Data/Tax Court Data - case.law/Tax Court Opinions.jsonl" param_tracking_file_no_txt = "C:/Users/thech/Documents/Data/Results/result_tracking" param_output_file = "C:/Users/thech/Documents/Data/Results/results" param_judge_file = "C:/Users/thech/Documents/Data/Results/judge_results.csv" param_granular_results_file = "C:/Users/thech/Documents/Data/Results/TF results by year.csv" interim_case_info_file = "C:/Users/thech/Documents/Data/Results/interim_case_info_file.csv" case_info_file_loc = "C:/Users/thech/Documents/Data/Results/results-case-info.csv" d_ct_1_opinion_file_loc = "C:/Users/thech/Documents/Data/F Supp.jsonl" d_ct_2_opinion_file_loc = "C:/Users/thech/Documents/Data/Fed Supp 2d (1998 - 2014).jsonl" d_ct_3_opinion_file_loc = "C:/Users/thech/Documents/Data/F Supp 3 (2014 - present).jsonl" cfc_opinion_file_loc = "C:/Users/thech/Documents/Data/Federal Claims Reporter 1993-2014.jsonl" bankruptcy_opinion_file_loc = "C:/Users/thech/Documents/Data/West's Bankruptcy Reporter 1980-2014.jsonl" s_ct_file_loc = "C:/Users/thech/Documents/Data/US Reports.jsonl" def save_weighted_tf_per_year(jsonl_file_loc, include_only_if_true_function=default_true, count_method=leg_hist_count, file_suffix="", output_case_info=False): term_word_count_dict = {} judge_word_count_dict = {} judge_earliest_opinion_dict = {} case_tf_dict = {} term_count_by_year_dict = {} spot_check_dict = {(1942, 1948): [], (1949, 1955): [], (1956, 1962): [], (1963, 1969): [], (1970, 1976): [], (1977, 1983): [], (1984, 1991): [], (1992, 1999): [], (2000, 2007): [], (2008, 2015): [], } taxpayer_win_count = 0 irs_win_count = 0 no_winner_count = 0 if count_method==leg_hist_count: term_list = leg_hist_list elif count_method==textualist_count: term_list = textualist_list elif count_method==textualist_list_ex_dictionaries: term_list = textualist_list_ex_dictionaries elif count_method == normative_count: term_list = normative_list elif count_method == interpretive_count: term_list = interpretive_list else: term_list = None param_tracking_file = param_tracking_file_no_txt + file_suffix + ".txt" tracking_output_file = open(param_tracking_file, "w", encoding="utf-8") open_file = open(jsonl_file_loc, "rb") tracking_op_num = 0 if output_case_info: case_info_file = open(interim_case_info_file, "w") case_info_file.write("ID, TF, Judge, is_democrat, taxpayer_wins, Year of Opinion\n") opinion_count = 0 word_count = 0 # Steps through, each line in the JSONL file for opinion in json_lines.reader(open_file): opinion_count = opinion_count + 1 date_string = opinion["decision_date"] year_string = date_string[0:4] year = int(year_string) # Round up to the nearest even number # year = year + (year % 2) opinion_id = int(opinion["id"]) # Sometimes the opinion is empty, if so skip. if opinion["casebody"]["data"]["opinions"]: op_plain_text = pre_process_TC(opinion["casebody"]["data"]["opinions"][0]["text"]) # print(str(leg_hist_count(op_plain_text))) else: continue op_hit_count = count_method(op_plain_text) #print(str(op_hit_count)) if count_method != textualist_yes and count_method != leg_hist_yes and count_method != normative_yes and \ count_method != interpretive_yes: op_word_count = document_word_count(op_plain_text) word_count = word_count + op_word_count multiplier = 1000000 else: op_word_count = 1 word_count = word_count + 1 multiplier = 1 taxpayer_won = did_taxpayer_win(op_plain_text) if taxpayer_won == True: taxpayer_win_count = taxpayer_win_count + 1 elif taxpayer_won == False: irs_win_count = irs_win_count + 1 elif taxpayer_won == None: no_winner_count = no_winner_count + 1 if op_hit_count > 1 and output_case_info: opposite_count = None if count_method == leg_hist_count: opposite_count = textualist_count elif count_method == textualist_count: opposite_count = leg_hist_count elif count_method == textualist_count_ex_dictionaries: opposite_count = leg_hist_count elif count_method == normative_count: opposite_count = interpretive_count elif count_method == interpretive_count: opposite_count = normative_count opposite_hit_count = opposite_count(op_plain_text) opinion_tuple = (opinion["name_abbreviation"], opinion["citations"][0]["cite"], year, op_plain_text, op_hit_count, opposite_hit_count) #tracking_output_file.write("[[OP NO " + str(tracking_op_num) + ", year = " + str(year) + "\n") #tracking_output_file.write(op_plain_text) #tracking_op_num = tracking_op_num + 1 for (start_year, end_year) in spot_check_dict: if year >= start_year and year <= end_year: spot_check_dict[(start_year, end_year)].append(opinion_tuple) if not opinion["casebody"]["data"]: print("No data: " + opinion) continue if opinion["casebody"]["data"]["opinions"][0]["author"]: judge = correct_judge_name(opinion["casebody"]["data"]["opinions"][0]["author"]) if judge in judge_word_count_dict: [judge_tf_count, judge_word_count] = judge_word_count_dict[judge] judge_word_count_dict[judge] = [op_hit_count + judge_tf_count, judge_word_count + op_word_count] else: judge_word_count_dict[judge] = [op_hit_count, op_word_count] if judge in judge_earliest_opinion_dict: if year < judge_earliest_opinion_dict[judge]: judge_earliest_opinion_dict[judge] = year else: judge_earliest_opinion_dict[judge] = year else: print("No judge: " + str(opinion_id)) judge="n/a" if (not include_only_if_true_function(opinion_text=op_plain_text, name=judge)): continue if output_case_info: dem_value = "-1" if is_democrat(name=judge): dem_value = "1" elif is_republican(name=judge): dem_value = "0" taxpayer_win_value = "-1" if taxpayer_won == True: taxpayer_win_value = "1" elif taxpayer_won == False: taxpayer_win_value = "0" if judge != "n/a": case_info_file.write(str(opinion_id) + ", " + str(op_hit_count / op_word_count) + ", " + judge + ", " + dem_value + ", " + taxpayer_win_value + ", " + year_string + "\n") if year in term_word_count_dict: [current_hist_count, current_total_count] = term_word_count_dict[year] # print(str(current_hist_count) + ", " + str(current_total_count)) term_word_count_dict[year] = [current_hist_count + op_hit_count, current_total_count + op_word_count] else: term_word_count_dict[year] = [op_hit_count, op_word_count] if term_list: if year in term_count_by_year_dict: one_year_term_count_dict = term_count_by_year_dict[year] for term in term_list: one_year_term_count_dict[term] = one_year_term_count_dict[term] + op_plain_text.count(term) else: new_dict = {} for term in term_list: new_dict[term] = op_plain_text.count(term) term_count_by_year_dict[year] = new_dict print("Opinion count: " + str(opinion_count)) print("Word count: " + str(word_count)) print("Taxpayer win count: " + str(taxpayer_win_count)) print("IRS win count: " + str(irs_win_count)) print("No winner count: " + str(no_winner_count)) if output_case_info: case_info_file.close() output_file = open(param_output_file + file_suffix + ".csv", "w") output_file.write("Year, Term Frequency\n") for year in sorted(term_word_count_dict): [hist_count, total_count] = term_word_count_dict[year] output_file.write(str(year) + ", " + str(multiplier * hist_count / total_count) + "\n") judge_output_file = open(param_judge_file, "w", encoding="utf-8") judge_output_file.write("Judge Surname, TF, Earliest Opinion Year, Is Dem\n") for judge in sorted(judge_word_count_dict): [tf_count, word_count] = judge_word_count_dict[judge] if judge not in all_judge_surnames and judge not in unknown_judge_surnames: print("\"" + judge + "\", ") # Generate dummy variable for party affiliation if judge in democrat_judge_surnames: is_dem = 1 elif judge in republican_judge_surnames: is_dem = 0 else: is_dem = -1 judge_output_file.write(judge + ", " + str(multiplier * tf_count / word_count) + ", " + str(judge_earliest_opinion_dict[judge]) + ", " + str(is_dem) + "\n") if output_case_info: old_case_info_file = open(interim_case_info_file, "r") case_info_file = open(case_info_file_loc, "w") case_info_file.write("ID, TF, Judge, Is Democrat, Taxpayer Wins, Year of Opinion, Year Appointed") for judge in judge_word_count_dict: case_info_file.write(", " + judge) case_info_file.write("\n") for line in old_case_info_file: judge = line.split(", ")[2] try: first_year = judge_earliest_opinion_dict[judge] case_info_file.write(line[:-1] + ", " + str(first_year)) for judge_dummy in judge_word_count_dict: if judge == judge_dummy: case_info_file.write(", 1") else: case_info_file.write(", 0") case_info_file.write("\n") except: print("Key error for judge: " + judge) case_info_file.close() granular_output_file = open(param_granular_results_file, "w", encoding="utf-8") granular_output_file.write("Year, Term, Count\n") if term_count_by_year_dict: for year in sorted(term_count_by_year_dict): one_year_dict = term_count_by_year_dict[year] for term in sorted(one_year_dict): granular_output_file.write(str(year) + ", " + term + ", " + str(one_year_dict[term]) + "\n") relevant_list = None if count_method == leg_hist_count: relevant_list = leg_hist_list elif count_method == textualist_count: relevant_list = textualist_list elif count_method == textualist_count_ex_dictionaries: relevant_list = textualist_list_ex_dictionaries elif count_method == normative_count: relevant_list = normative_list elif count_method == interpretive_count: relevant_list = interpretive_list opposite_list = None if count_method == leg_hist_count: opposite_list = textualist_list elif count_method == textualist_count: opposite_list = leg_hist_list elif count_method == textualist_count_ex_dictionaries: opposite_list = leg_hist_list elif count_method == normative_count: opposite_list = interpretive_list elif count_method == interpretive_count: opposite_list = normative_list for key in spot_check_dict: opinion_list = spot_check_dict[key] op_list_len = len(opinion_list) if op_list_len > 0: (name, citation, year, op_plain_text, score, opposite_score) = opinion_list[randrange(op_list_len)] print(name + ", " + citation + " (" + str(year) + ") : " + str(score) + ", opposite = " + str(opposite_score)) investigate_term_use(op_plain_text, relevant_list) investigate_term_use(op_plain_text, opposite_list) else: print("No hits, key is " + str(key)) #save_weighted_tf_per_year(d_ct_1_opinion_file_loc, default_true, textualist_count, file_suffix="d-ct-1-text") #save_weighted_tf_per_year(d_ct_2_opinion_file_loc, default_true, textualist_count, file_suffix="d-ct-2-text") #save_weighted_tf_per_year(d_ct_3_opinion_file_loc, default_true, textualist_count, file_suffix="d-ct-3-text") #save_weighted_tf_per_year(d_ct_1_opinion_file_loc, default_true, leg_hist_count, file_suffix="d-ct-1-leg") #save_weighted_tf_per_year(d_ct_2_opinion_file_loc, default_true, leg_hist_count, file_suffix="d-ct-2-leg") #save_weighted_tf_per_year(d_ct_3_opinion_file_loc, default_true, leg_hist_count, file_suffix="d-ct-3-leg") save_weighted_tf_per_year(tax_ct_opinion_file_loc, default_true, textualist_count_ex_dictionaries, output_case_info=True) #save_weighted_tf_per_year(tax_ct_opinion_file_loc, default_true, leg_hist_yes, file_suffix="tax-ct-purp") #save_weighted_tf_per_year(tax_ct_opinion_file_loc, default_true, normative_yes, file_suffix="tax-ct-norm") #save_weighted_tf_per_year(tax_ct_opinion_file_loc, default_true, interpretive_yes, file_suffix="tax-ct-interp") #save_weighted_tf_per_year(tax_ct_opinion_file_loc, default_true, textualist_yes, file_suffix="tax-ct-text") #for list in tax_canon_list_of_lists: # # name = list[0] # tf_counter = Tf_counter(hit_list=list) # save_weighted_tf_per_year(tax_ct_opinion_file_loc, default_true, tf_counter.tf_list_count, file_suffix=name)