智能拨号匹配算法（二）

private ScoreAndHits completeMatch(String reg) { ScoreAndHits scoreAndHits = new ScoreAndHits(-1, 0f, new ArrayList<PointPair>()); for (int i = 0; i < fullNameNumberWithoutSpace.size(); i++) { String str = fullNameNumberWithoutSpace.get(i); if (reg.equals(str)) { scoreAndHits.nameIndex = i; scoreAndHits.score = Match_Level_Complete; scoreAndHits.pairs.add(new PointPair(i, -1)); scoreAndHits.matchLevel = Level_Complete; return scoreAndHits; } } for (int i = 0; i < phones.size(); i++) { PhoneStruct phone = phones.get(i); if (reg.equals(phone.phoneNumber)) { scoreAndHits.nameIndex = i; scoreAndHits.score = Match_Level_Complete; scoreAndHits.pairs.add(new PointPair(i, -1)); scoreAndHits.matchType = Match_Type_Phone; scoreAndHits.matchLevel = Level_Complete; return scoreAndHits; } } // 走到这里说明没有匹配 return new ScoreAndHits(-1, 0f, new ArrayList<PointPair>()); }

private ScoreAndHits foreAcronymOverFlowMatch(String reg) { // 因为有可能是多音字，所以这个方法用来对比不同拼音的匹配度，并取最大的那个 ScoreAndHits scoreAndHits = new ScoreAndHits(-1, 0f, new ArrayList<PointPair>()); for (int i = 0; i < fullNameNumber.size(); i++) { ArrayList<String> names = fullNameNumber.get(i); ScoreAndHits tmpscore = foreAcronymOverFlowMatch(names, reg); if (tmpscore.score > scoreAndHits.score) { scoreAndHits = tmpscore; scoreAndHits.nameIndex = i; } } scoreAndHits.matchLevel = Level_Fore_Acronym_Overflow; return scoreAndHits; } // 在第一个字母确定的情况下，第二个字母有可能有三种情况 // 一、在第一个字母所在单词的邻居位置charAt(x+1); // 二、在第二个单词的首字母处 // 三、以上两种情况皆不符合，不匹配，出局 private ScoreAndHits foreAcronymOverFlowMatch(ArrayList<String> names, String reg) { // 用来得出某一个拼音的匹配值。 ScoreAndHits scoreAndHits = new ScoreAndHits(-1, 0f, new ArrayList<PointPair>()); if (names.get(0).charAt(0) == reg.charAt(0)) { //其实crossWords()方法才是求匹配值的方法，lol OverflowMatchValue value = crossWords(names, reg, 0, 0, 0); int cross = crossWords(names, reg, 0, 0, 0).crossed; if (cross > 0) { scoreAndHits.score = Match_Level_Fore_Acronym_Overflow + cross * Match_Score_Reward - (names.size() - cross) * Match_Miss_Punish; scoreAndHits.pairs = value.pairs; } } return scoreAndHits; } /** * 返回一串字符能跨越另一串字符的长度，根据上面的匹配规则，要尽可能的多匹配单词。若要保证 * 能匹配最长的长度，只要保证下一个字符开始的一段字符能匹配最长的长度即可，换名话说， * 如果想要让96758匹配最长的字符串，那么只要保证6758能匹配最长的字符串即可， * 然后758，再然后58……。例如，名字叫PanAnNing，输入pan，那么应该匹配三个首字母， * PAN,而不是第一姓的拼音Pan.这是一个递归。 * * * @param names * @param regString * 匹配字符串 * @param listIndex * 匹配到的list的第listIndex个单词 * @param strIndex * 匹配到第listIndex个单词中的第strIndex个字母 * @param regIndex * regchar的匹配位置，比如匹配到了96758的7上，也就是regIndex==2. * @return */ private OverflowMatchValue crossWords(ArrayList<String> names, String regString, int listIndex, int strIndex, int regIndex) { //在进入此方法时，第listIndex个单词的第strIndex的字母肯定是 // 与regString的第regIndex个字母相等的 OverflowMatchValue result = new OverflowMatchValue(0, false); OverflowMatchValue reser = new OverflowMatchValue(0, false);//返回如果匹配到本单词的下一个字母能得到的匹配值 OverflowMatchValue impul = new OverflowMatchValue(0, false);//返回如果匹配到下一个单词的第一个字母的匹配值 // 仍然以【名字叫PanAnNing，输入pan(其实对比的是数字，这里转化成字母为了方便)】举例 // 假设这时listIndex，strIndex，regIndex都是0，所以现在匹配的是p字母，它毫无疑问对应姓名的第一个P， // 那么下一步应该怎么做呢，由上面所说【保证下一个字符开始的一段字符能匹配最长的长度即可】 // 也就是说，我们输入的pan中的第二个字母a匹配哪个位置将得到最优结果。这个盒子中显然有两种情况。 // 一是匹配姓氏Pan中的a，另一个是匹配名字AnNing中的A。 // reser就表示如果a匹配到Pan中的a最终的匹配值。 // impul就表示如果a匹配到AnNing中的A得到的最终的匹配值。 if (regIndex < regString.length() - 1) { //如果还没匹配到最后一个字母，也就是regString还没匹配到最后一个，那么将检测如 //果将regString的下一个字母放到哪里将得到最优结果 char nextChar = regString.charAt(regIndex + 1); if (listIndex < names.size() - 1 && nextChar == names.get(listIndex + 1).charAt(0)) { impul = crossWords(names, regString, listIndex + 1, 0, regIndex + 1); } if (strIndex < names.get(listIndex).length() - 1 && nextChar == names.get(listIndex).charAt(strIndex + 1)) { reser = crossWords(names, regString, listIndex, strIndex + 1, regIndex + 1); } //如果上面两个条件都不成立，那么就表示本次匹配失败 } else { result = new OverflowMatchValue((strIndex == 0) ? 1 : 0, true); result.pairs.add(0, new PointPair(listIndex, strIndex)); } if (reser.matched || impul.matched) { //如果其中任意一个方式可以匹配，那么结果最大的那个就是最优结果 if (impul.crossed > reser.crossed) { result = impul; } else { result = reser; } result.matched = true; result.crossed = ((strIndex == 0) ? 1 : 0) + Math.max(result.crossed, result.crossed); result.pairs.add(0, new PointPair(listIndex, strIndex)); } return result; } static class OverflowMatchValue { public int crossed = 0; public boolean matched = false; public ArrayList<PointPair> pairs = new ArrayList<PointPair>(); public OverflowMatchValue(int c, boolean m) { this.crossed = c; this.matched = m; } }

private ScoreAndHits backAcronymOverFlowMatch(String reg) { //跟上面差不多 ScoreAndHits scoreAndHits = new ScoreAndHits(-1, 0f, new ArrayList<PointPair>()); for (int i = 0; i < fullNameNumber.size(); i++) { ArrayList<String> names = fullNameNumber.get(i); ScoreAndHits tmp = backAcronymOverFlowMatch(names, reg); if (tmp.score > scoreAndHits.score) { scoreAndHits = tmp; scoreAndHits.nameIndex = i; } } scoreAndHits.matchLevel = Level_Back_Acronym_Overflow; return scoreAndHits; } private ScoreAndHits backAcronymOverFlowMatch(ArrayList<String> names, String reg) { int score = 0; int punish = 0; ScoreAndHits scoreAndHits = new ScoreAndHits(-1, 0f, new ArrayList<PointPair>()); // 有可能会调用多次crossWords，取决于名字的长度。这是跟前面的不同 for (int i = 0; i < names.size(); i++) { String string = (String) names.get(i); if (string.charAt(0) == reg.charAt(0)) { OverflowMatchValue value = crossWords(names, reg, i, 0, 0); int cross = value.crossed; int lost = names.size() - cross; if (cross > score || cross == score && punish > lost) { scoreAndHits.pairs = value.pairs; score = cross; punish = lost; } } } if (score > 0) { scoreAndHits.score = Match_Level_Back_Acronym_Overflow + score * Match_Score_Reward - punish * Match_Miss_Punish; return scoreAndHits; } else { return new ScoreAndHits(-1, 0f, new ArrayList<PointPair>()); } }

private ScoreAndHits backHeadlessParagraphMatch(String reg) { // TODO，如果此人有两个相似的号码，那么就只能匹配出一个来了，这是很显然不对的 int punish = 0; ScoreAndHits scoreAndHits = new ScoreAndHits(-1, -1f, new ArrayList<PointPair>()); scoreAndHits.matchLevel = Level_Headless; scoreAndHits.matchType = Match_Type_Phone; // 不匹配姓名 for (int i = 0; i < phones.size(); i++) { PhoneStruct phone = phones.get(i); int sco = phone.phoneNumber.indexOf(reg); if (sco >= 0) { int lost = phone.phoneNumber.length() - reg.length(); if (scoreAndHits.score < sco || sco == scoreAndHits.score && punish > lost) { scoreAndHits.score = sco; scoreAndHits.nameIndex = i; punish = lost; } //pairs.add放到判断外面是因为有可能匹配到同一个人的多个手机号码。 scoreAndHits.pairs.add(new PointPair(i, sco)); } } if (scoreAndHits.score >= 0) { scoreAndHits.score = Match_Level_Headless - scoreAndHits.score * Match_Score_Reward - punish * Match_Miss_Punish; } return scoreAndHits; } //表示电话号码的一个静态类，将过滤掉开头的+86以及系统可能自动生成的“-”以及其他非数字的字符以便于搜索 public static class PhoneStruct { public String phoneNumber; public int phoneType; public String displayType; public PhoneStruct(String number, int type) { phoneNumber = number.replaceAll("^\\+86", "").replaceAll("[\\]+", ""); phoneType = type; } }

相关阅读