# wordfreq.awk --- print list of word frequencies # from http://www.sunsite.ualberta.ca/Documentation/Gnu/gawk-3.1.0/html_chapter/gawk_15.html#SEC220 { $0 = tolower($0) # remove case distinctions # remove punctuation gsub(/[^[:alnum:]_[:blank:]]/, "", $0) for (i = 1; i <= NF; i++) freq[$i]++ } END { for (word in freq) printf "%-20s\t%4d\n", word, freq[word] }