这次的实验测试分为很多个小部分,由于个人能力有限,我只完成了前三个部分。其中第一个是统计文本中26个英文字幕出现的次数与比例,并降序排序:是自行确定欠多少个最多出现的单词
package piao;import java.io.BufferedReader;import java.io.FileReader;import java.text.NumberFormat;public class text0{ public static void main(String[] args) throws Exception { BufferedReader br = new BufferedReader(new FileReader("D:\\java/eclipse/测试/piao.txt")); int[] count = new int[26]; char[] c = new char[1]; int len = br.read(c); while(len!=-1) { if(c[0]>='A'&&c[0]<='Z') { int number = c[0]; count[number-65]++; } if(c[0]>='a'&&c[0]<='z') { int number = c[0]; count[number-97]++; } len = br.read(c); } count=Paixu(count); Print(count); br.close(); } public static int[] Paixu(int[] count) { int temp; int size=count.length; for(int i=0;i0) { char lowerCase = (char)(i+97); System.out.println(lowerCase+"("+count[i]+")"+"("+a[i]+"%)"); } } }}
第二部分是统计所有单词出现的次数并降序排序:
package piao;import java.io.BufferedReader;import java.io.FileReader;import java.util.ArrayList;import java.util.Collections;import java.util.Comparator;import java.util.List;import java.util.Map;import java.util.TreeMap;import java.util.regex.Matcher;import java.util.regex.Pattern;public class text1 { public static void main(String[] args) throws Exception { BufferedReader re = new BufferedReader(new FileReader("D:\\java/eclipse/测试/piao.txt")); StringBuffer buffer = new StringBuffer(); String line = null; while ((line = re.readLine()) != null) { buffer.append(line); } re.close(); Pattern expression = Pattern.compile("[a-zA-Z]+");// 定义正则表达式匹配单词 String string = buffer.toString(); Matcher matcher = expression.matcher(string); Mapmap = new TreeMap (); String word = ""; int times = 0; while (matcher.find()) { // 是否匹配单词 word = matcher.group();// 得到一个单词-树映射的键 if (map.containsKey(word)) { // 如果包含该键,单词出现过 times = map.get(word);// 得到单词出现的次数 map.put(word, times + 1); } else { map.put(word, 1);// 否则单词第一次出现,添加到映射中 } } List >list = new ArrayList >(map.entrySet()); Collections.sort(list, new Comparator >(){ // 排序,打印 public int compare(Map.Entry left,Map.Entry right) { return (left.getValue()).compareTo(right.getValue()); } }); int last = list.size() - 1; for (int i = last; i > 0; i--) { String key = list.get(i).getKey(); Integer value = list.get(i).getValue(); System.out.println(key + " :" + value); } }}
第三部分是自行确定欠多少个最多出现的单词:
package piao;import java.io.BufferedReader;import java.io.FileReader;import java.util.ArrayList;import java.util.Collections;import java.util.Comparator;import java.util.List;import java.util.Map;import java.util.Scanner;import java.util.TreeMap;import java.util.regex.Matcher;import java.util.regex.Pattern;public class text2 { public static void main(String[] args) throws Exception { BufferedReader re = new BufferedReader(new FileReader("D:\\java/eclipse/测试/piao.txt")); StringBuffer buffer = new StringBuffer(); String line = null; while ((line = re.readLine()) != null) { buffer.append(line); } re.close(); Pattern expression = Pattern.compile("[a-zA-Z]+");// 定义正则表达式匹配单词 String string = buffer.toString(); Matcher matcher = expression.matcher(string); Mapmap = new TreeMap (); String word = ""; int times = 0; while (matcher.find()) { // 是否匹配单词 word = matcher.group();// 得到一个单词-树映射的键 if (map.containsKey(word)) { // 如果包含该键,单词出现过 times = map.get(word);// 得到单词出现的次数 map.put(word, times + 1); } else { map.put(word, 1);// 否则单词第一次出现,添加到映射中 } } List >list = new ArrayList >(map.entrySet()); Collections.sort(list, new Comparator >(){ // 排序,打印 public int compare(Map.Entry left,Map.Entry right) { return (left.getValue()).compareTo(right.getValue()); } }); @SuppressWarnings("resource") Scanner in=new Scanner(System.in); System.out.println("输入前n个最常出现的单词:"); int n=in.nextInt(); int last = list.size() - 1; for (int i = last; i > last - n; i--) { String key = list.get(i).getKey(); Integer value = list.get(i).getValue(); System.out.println(key + " :" + value); } }}