JavaAlgorithms/Others/TopKWords.java

87 lines
2.5 KiB
Java
Raw Normal View History

package Others;
import java.io.*;
import java.util.*;
2018-04-05 05:49:14 +08:00
/* display the most frequent K words in the file and the times it appear
2020-10-24 18:23:28 +08:00
in the file shown in order (ignore case and periods) */
2018-04-05 05:49:14 +08:00
public class TopKWords {
2020-10-24 18:23:28 +08:00
static class CountWords {
private String fileName;
2020-10-24 18:23:28 +08:00
public CountWords(String fileName) {
this.fileName = fileName;
}
2020-10-24 18:23:28 +08:00
public Map<String, Integer> getDictionary() {
Map<String, Integer> dictionary = new HashMap<>();
FileInputStream fis = null;
2020-10-24 18:23:28 +08:00
try {
2020-10-24 18:23:28 +08:00
fis = new FileInputStream(fileName); // open the file
int in = 0;
String s = ""; // init a empty word
in = fis.read(); // read one character
2020-10-24 18:23:28 +08:00
while (-1 != in) {
if (Character.isLetter((char) in)) {
s += (char) in; // if get a letter, append to s
} else {
// this branch means an entire word has just been read
if (s.length() > 0) {
// see whether word exists or not
if (dictionary.containsKey(s)) {
// if exist, count++
dictionary.put(s, dictionary.get(s) + 1);
} else {
// if not exist, initiate count of this word with 1
dictionary.put(s, 1);
}
}
2020-10-24 18:23:28 +08:00
s = ""; // reInit a empty word
}
in = fis.read();
}
2020-10-24 18:23:28 +08:00
return dictionary;
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
// you always have to close the I/O streams
if (fis != null) fis.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return null;
}
2020-10-24 18:23:28 +08:00
}
2020-10-24 18:23:28 +08:00
public static void main(String[] args) {
// you can replace the filePath with yours
CountWords cw = new CountWords("/Users/lisanaaa/Desktop/words.txt");
Map<String, Integer> dictionary =
cw.getDictionary(); // get the words dictionary: {word: frequency}
2018-04-05 05:49:14 +08:00
2020-10-24 18:23:28 +08:00
// we change the map to list for convenient sort
List<Map.Entry<String, Integer>> list = new ArrayList<>(dictionary.entrySet());
2018-04-05 05:49:14 +08:00
2020-10-24 18:23:28 +08:00
// sort by lambda valueComparator
list.sort(Comparator.comparing(m -> m.getValue()));
2018-04-05 05:49:14 +08:00
2020-10-24 18:23:28 +08:00
Scanner input = new Scanner(System.in);
int k = input.nextInt();
while (k > list.size()) {
System.out.println("Retype a number, your number is too large");
input = new Scanner(System.in);
k = input.nextInt();
}
for (int i = 0; i < k; i++) {
System.out.println(list.get(list.size() - i - 1));
2018-04-05 05:49:14 +08:00
}
2020-10-24 18:23:28 +08:00
input.close();
}
2018-04-05 05:49:14 +08:00
}