"""
This Python program reads in a file specified by the user and gives some
information about word frequency.  It gives the number of words in the file;
it also gives the ten most common words and their respective frequencies.

NOTE: there are easier and more efficient ways to accomplish this, but all
of the below can be done using the tools from CS21.

Author: Zachary Palmer
Date: 2015-10-21
"""

def get_filename():
  """
  Retrieves a filename from the user.
  """
  return raw_input("What file would you like to examine? ")

def get_words(filename):
  """
  Reads a list of words from a file.
  """
  words = []
  f = open(filename,"r")
  for line in f:
    for word in line.strip().split():
      words.append(word)
  return words

def find_word_in_list(word, word_list):
  """
  Determines the position of a word in the word list.  If the word cannot be
  found, -1 is returned instead.
  """
  for i in range(len(word_list)):
    if word_list[i] == word:
      return i
  return -1

def group_words(words, word_list, count_list):
  """
  Group up matching words, putting the results in word_list and count_list.
  For every word in words, that word is added to word_list if it was not already
  there.  count_list represents the number of times a given word has appeared,
  so the corresponding index describes the number of that word we have found.
  For instance, if word_list[4] is "house" and count_list[4] is 9, then we have
  seen the word "house" 9 times.  These two lists form a sort of an accumulator
  for the word counting loop.

  Note that this function does not return anything; it changes the lists it is
  provided.
  """
  for word in words:
    if word not in word_list:
      word_list.append(word)
      count_list.append(1)
    else:
      position = find_word_in_list(word, word_list)
      count_list[position] += 1

def sum_list(a_list):
  """
  Adds up all the values in a list.
  """
  total = 0
  for n in a_list:
    total += n
  return total

def find_largest_index(count_list):
  """
  Determines the index of the largest value in the list.
  """
  index = -1
  largest = -1
  for n in range(len(count_list)):
    if count_list[n] > largest:
      largest = count_list[n]
      index = n
  return index

def remove_index(index, a_list):
  """
  Removes the specified index from the list.
  """
  return a_list[:index] + a_list[index+1:]

def main():
  filename = get_filename()
  words = get_words(filename)
  word_list = []
  count_list = []
  group_words(words, word_list, count_list)
  print "Total words: %d" % (sum_list(count_list))
  print "Common words: "
  for n in range(10):
    i = find_largest_index(count_list)
    print "  %20s     %6d" % (word_list[i],count_list[i])
    word_list = remove_index(i, word_list)
    count_list = remove_index(i, count_list)

main()

