# The purpose of this program is to calculate TF scores with respect to all Tax Court opinions in a particular folder.
from Opinion import republican_judge_surnames, democrat_judge_surnames, all_judge_surnames, unknown_judge_surnames, \
	pre_process_TC, correct_judge_name, default_true, irs_wins, taxpayer_wins, did_taxpayer_win, is_republican, is_democrat
from Text_Scoring import leg_hist_count, investigate_term_use
from Text_Scoring import textualist_count, textualist_count_ex_dictionaries
from Text_Scoring import document_word_count
from Text_Scoring import normative_count
from Text_Scoring import leg_hist_list
from Text_Scoring import textualist_list, textualist_list_ex_dictionaries
from Text_Scoring import normative_list
from Text_Scoring import interpretive_count
from Text_Scoring import interpretive_list
from Text_Scoring import normative_yes, interpretive_yes, leg_hist_yes, textualist_yes
from Text_Scoring import common_term_count
from Text_Scoring import Tf_counter
from Text_Scoring import tax_canon_list_of_lists
import json_lines
from random import randrange

# PARAMETERS
bta_opinion_file_loc = "C:/Users/thech/Documents/Data/Tax Court Data - case.law/Board of Tax Appeals Opinions.jsonl"
tax_ct_opinion_file_loc = "C:/Users/thech/Documents/Data/Tax Court Data - case.law/Tax Court Opinions.jsonl"
param_tracking_file_no_txt = "C:/Users/thech/Documents/Data/Results/result_tracking"
param_output_file = "C:/Users/thech/Documents/Data/Results/results"
param_judge_file = "C:/Users/thech/Documents/Data/Results/judge_results.csv"
param_granular_results_file = "C:/Users/thech/Documents/Data/Results/TF results by year.csv"
interim_case_info_file = "C:/Users/thech/Documents/Data/Results/interim_case_info_file.csv"
case_info_file_loc = "C:/Users/thech/Documents/Data/Results/results-case-info.csv"


d_ct_1_opinion_file_loc = "C:/Users/thech/Documents/Data/F Supp.jsonl"
d_ct_2_opinion_file_loc = "C:/Users/thech/Documents/Data/Fed Supp 2d (1998 - 2014).jsonl"
d_ct_3_opinion_file_loc = "C:/Users/thech/Documents/Data/F Supp 3 (2014 - present).jsonl"
cfc_opinion_file_loc = "C:/Users/thech/Documents/Data/Federal Claims Reporter 1993-2014.jsonl"
bankruptcy_opinion_file_loc = "C:/Users/thech/Documents/Data/West's Bankruptcy Reporter 1980-2014.jsonl"
s_ct_file_loc = "C:/Users/thech/Documents/Data/US Reports.jsonl"



def save_weighted_tf_per_year(jsonl_file_loc, include_only_if_true_function=default_true, count_method=leg_hist_count,
                              file_suffix="", output_case_info=False):

	term_word_count_dict = {}
	judge_word_count_dict = {}
	judge_earliest_opinion_dict = {}
	case_tf_dict = {}
	term_count_by_year_dict = {}
	spot_check_dict = {(1942, 1948): [],
	                   (1949, 1955): [],
	                   (1956, 1962): [],
	                   (1963, 1969): [],
	                   (1970, 1976): [],
	                   (1977, 1983): [],
	                   (1984, 1991): [],
	                   (1992, 1999): [],
	                   (2000, 2007): [],
	                   (2008, 2015): [],
	                   }
	taxpayer_win_count = 0
	irs_win_count = 0
	no_winner_count = 0


	if count_method==leg_hist_count:
		term_list = leg_hist_list
	elif count_method==textualist_count:
		term_list = textualist_list
	elif count_method==textualist_list_ex_dictionaries:
		term_list = textualist_list_ex_dictionaries
	elif count_method == normative_count:
		term_list = normative_list
	elif count_method == interpretive_count:
		term_list = interpretive_list
	else:
		term_list = None

	param_tracking_file = param_tracking_file_no_txt + file_suffix + ".txt"
	tracking_output_file = open(param_tracking_file, "w", encoding="utf-8")
	open_file = open(jsonl_file_loc, "rb")
	tracking_op_num = 0

	if output_case_info:
		case_info_file = open(interim_case_info_file, "w")
		case_info_file.write("ID, TF, Judge, is_democrat, taxpayer_wins, Year of Opinion\n")

	opinion_count = 0
	word_count = 0
	# Steps through, each line in the JSONL file
	for opinion in json_lines.reader(open_file):
		opinion_count = opinion_count + 1
		date_string = opinion["decision_date"]
		year_string = date_string[0:4]
		year = int(year_string)
		# Round up to the nearest even number
		# year = year + (year % 2)
		opinion_id = int(opinion["id"])

		# Sometimes the opinion is empty, if so skip.
		if opinion["casebody"]["data"]["opinions"]:
			op_plain_text = pre_process_TC(opinion["casebody"]["data"]["opinions"][0]["text"])
			# print(str(leg_hist_count(op_plain_text)))
		else:
			continue

		op_hit_count = count_method(op_plain_text)
		#print(str(op_hit_count))

		if count_method != textualist_yes and count_method != leg_hist_yes and count_method != normative_yes and \
			count_method != interpretive_yes:

			op_word_count = document_word_count(op_plain_text)
			word_count = word_count + op_word_count
			multiplier = 1000000

		else:

			op_word_count = 1
			word_count = word_count + 1
			multiplier = 1


		taxpayer_won = did_taxpayer_win(op_plain_text)

		if taxpayer_won == True:
			taxpayer_win_count = taxpayer_win_count + 1
		elif taxpayer_won == False:
			irs_win_count = irs_win_count + 1
		elif taxpayer_won == None:
			no_winner_count = no_winner_count + 1

		if op_hit_count > 1 and output_case_info:

			opposite_count = None
			if count_method == leg_hist_count:
				opposite_count = textualist_count
			elif count_method == textualist_count:
				opposite_count = leg_hist_count
			elif count_method == textualist_count_ex_dictionaries:
				opposite_count = leg_hist_count
			elif count_method == normative_count:
				opposite_count = interpretive_count
			elif count_method == interpretive_count:
				opposite_count = normative_count

			opposite_hit_count = opposite_count(op_plain_text)

			opinion_tuple = (opinion["name_abbreviation"], opinion["citations"][0]["cite"], year, op_plain_text,
			                 op_hit_count, opposite_hit_count)
			#tracking_output_file.write("[[OP NO " + str(tracking_op_num) + ", year = " + str(year) + "\n")
			#tracking_output_file.write(op_plain_text)
			#tracking_op_num = tracking_op_num + 1
			for (start_year, end_year) in spot_check_dict:
				if year >= start_year and year <= end_year:
					spot_check_dict[(start_year, end_year)].append(opinion_tuple)

		if not opinion["casebody"]["data"]:
			print("No data: " + opinion)
			continue
		if opinion["casebody"]["data"]["opinions"][0]["author"]:
			judge = correct_judge_name(opinion["casebody"]["data"]["opinions"][0]["author"])
			if judge in judge_word_count_dict:
				[judge_tf_count, judge_word_count] = judge_word_count_dict[judge]
				judge_word_count_dict[judge] = [op_hit_count + judge_tf_count, judge_word_count + op_word_count]
			else:
				judge_word_count_dict[judge] = [op_hit_count, op_word_count]
			if judge in judge_earliest_opinion_dict:
				if year < judge_earliest_opinion_dict[judge]:
					judge_earliest_opinion_dict[judge] = year
			else:
				judge_earliest_opinion_dict[judge] = year
		else:
			print("No judge: " + str(opinion_id))
			judge="n/a"

		if (not include_only_if_true_function(opinion_text=op_plain_text, name=judge)):
			continue

		if output_case_info:

			dem_value = "-1"
			if is_democrat(name=judge):
				dem_value = "1"
			elif is_republican(name=judge):
				dem_value = "0"

			taxpayer_win_value = "-1"
			if taxpayer_won == True:
				taxpayer_win_value = "1"
			elif taxpayer_won == False:
				taxpayer_win_value = "0"

			if judge != "n/a":

				case_info_file.write(str(opinion_id) + ", " + str(op_hit_count / op_word_count) + ", " +
				                     judge + ", " + dem_value + ", " + taxpayer_win_value + ", " + year_string + "\n")

		if year in term_word_count_dict:
			[current_hist_count, current_total_count] = term_word_count_dict[year]
			# print(str(current_hist_count) + ", " + str(current_total_count))
			term_word_count_dict[year] = [current_hist_count + op_hit_count, current_total_count + op_word_count]
		else:
			term_word_count_dict[year] = [op_hit_count, op_word_count]

		if term_list:
			if year in term_count_by_year_dict:
				one_year_term_count_dict = term_count_by_year_dict[year]
				for term in term_list:
					one_year_term_count_dict[term] = one_year_term_count_dict[term] + op_plain_text.count(term)
			else:
				new_dict = {}
				for term in term_list:
					new_dict[term] = op_plain_text.count(term)
				term_count_by_year_dict[year] = new_dict

	print("Opinion count: " + str(opinion_count))
	print("Word count: " + str(word_count))
	print("Taxpayer win count: " + str(taxpayer_win_count))
	print("IRS win count: " + str(irs_win_count))
	print("No winner count: " + str(no_winner_count))

	if output_case_info:
		case_info_file.close()

	output_file = open(param_output_file + file_suffix + ".csv", "w")
	output_file.write("Year, Term Frequency\n")
	for year in sorted(term_word_count_dict):
		[hist_count, total_count] = term_word_count_dict[year]
		output_file.write(str(year) + ", " + str(multiplier * hist_count / total_count) + "\n")

	judge_output_file = open(param_judge_file, "w", encoding="utf-8")
	judge_output_file.write("Judge Surname, TF, Earliest Opinion Year, Is Dem\n")
	for judge in sorted(judge_word_count_dict):
		[tf_count, word_count] = judge_word_count_dict[judge]
		if judge not in all_judge_surnames and judge not in unknown_judge_surnames:
		 	print("\"" + judge + "\", ")

		# Generate dummy variable for party affiliation
		if judge in democrat_judge_surnames:
			is_dem = 1
		elif judge in republican_judge_surnames:
			is_dem = 0
		else:
			is_dem = -1

		judge_output_file.write(judge + ", " + str(multiplier * tf_count / word_count) + ", " + str(judge_earliest_opinion_dict[judge]) + ", " + str(is_dem) + "\n")

	if output_case_info:

		old_case_info_file = open(interim_case_info_file, "r")
		case_info_file = open(case_info_file_loc, "w")
		case_info_file.write("ID, TF, Judge, Is Democrat, Taxpayer Wins, Year of Opinion, Year Appointed")

		for judge in judge_word_count_dict:
			case_info_file.write(", " + judge)

		case_info_file.write("\n")

		for line in old_case_info_file:
			judge = line.split(", ")[2]

			try:
				first_year = judge_earliest_opinion_dict[judge]
				case_info_file.write(line[:-1] + ", " + str(first_year))

				for judge_dummy in judge_word_count_dict:

					if judge == judge_dummy:
						case_info_file.write(", 1")
					else:
						case_info_file.write(", 0")

				case_info_file.write("\n")

			except:
				print("Key error for judge: " + judge)

		case_info_file.close()

	granular_output_file = open(param_granular_results_file, "w", encoding="utf-8")
	granular_output_file.write("Year, Term, Count\n")
	if term_count_by_year_dict:
		for year in sorted(term_count_by_year_dict):
			one_year_dict = term_count_by_year_dict[year]
			for term in sorted(one_year_dict):
				granular_output_file.write(str(year) + ", " + term + ", " + str(one_year_dict[term]) + "\n")

	relevant_list = None
	if count_method == leg_hist_count:
		relevant_list = leg_hist_list
	elif count_method == textualist_count:
		relevant_list = textualist_list
	elif count_method == textualist_count_ex_dictionaries:
		relevant_list = textualist_list_ex_dictionaries
	elif count_method == normative_count:
		relevant_list = normative_list
	elif count_method == interpretive_count:
		relevant_list = interpretive_list

	opposite_list = None
	if count_method == leg_hist_count:
		opposite_list = textualist_list
	elif count_method == textualist_count:
		opposite_list = leg_hist_list
	elif count_method == textualist_count_ex_dictionaries:
		opposite_list = leg_hist_list
	elif count_method == normative_count:
		opposite_list = interpretive_list
	elif count_method == interpretive_count:
		opposite_list = normative_list

	for key in spot_check_dict:

		opinion_list = spot_check_dict[key]
		op_list_len = len(opinion_list)
		if op_list_len > 0:
			(name, citation, year, op_plain_text, score, opposite_score) = opinion_list[randrange(op_list_len)]
			print(name + ", " + citation + " (" + str(year) + ") :  " + str(score) + ", opposite = " + str(opposite_score))
			investigate_term_use(op_plain_text, relevant_list)
			investigate_term_use(op_plain_text, opposite_list)
		else:
			print("No hits, key is " + str(key))



#save_weighted_tf_per_year(d_ct_1_opinion_file_loc, default_true, textualist_count, file_suffix="d-ct-1-text")
#save_weighted_tf_per_year(d_ct_2_opinion_file_loc, default_true, textualist_count, file_suffix="d-ct-2-text")
#save_weighted_tf_per_year(d_ct_3_opinion_file_loc, default_true, textualist_count, file_suffix="d-ct-3-text")
#save_weighted_tf_per_year(d_ct_1_opinion_file_loc, default_true, leg_hist_count, file_suffix="d-ct-1-leg")
#save_weighted_tf_per_year(d_ct_2_opinion_file_loc, default_true, leg_hist_count, file_suffix="d-ct-2-leg")
#save_weighted_tf_per_year(d_ct_3_opinion_file_loc, default_true, leg_hist_count, file_suffix="d-ct-3-leg")

save_weighted_tf_per_year(tax_ct_opinion_file_loc, default_true, textualist_count_ex_dictionaries, output_case_info=True)


#save_weighted_tf_per_year(tax_ct_opinion_file_loc, default_true, leg_hist_yes, file_suffix="tax-ct-purp")
#save_weighted_tf_per_year(tax_ct_opinion_file_loc, default_true, normative_yes, file_suffix="tax-ct-norm")
#save_weighted_tf_per_year(tax_ct_opinion_file_loc, default_true, interpretive_yes, file_suffix="tax-ct-interp")
#save_weighted_tf_per_year(tax_ct_opinion_file_loc, default_true, textualist_yes, file_suffix="tax-ct-text")

#for list in tax_canon_list_of_lists:
#
#	name = list[0]
#	tf_counter = Tf_counter(hit_list=list)
#	save_weighted_tf_per_year(tax_ct_opinion_file_loc, default_true, tf_counter.tf_list_count, file_suffix=name)