找到一个单词的所有相似单词的Java实现

0
Java .NET 算法 C/C++ Go 5183 次浏览

相似单词为 只差一位字母的单词,练习Map容器

    package chapter4;  
      
    import java.util.*;  
    import java.util.Map.Entry;  
      
    /* 
     * 说明:找到一个单词的所有相似单词 例如: wine 和 dine wind 和wing 只有一个字母不同 
     */  
    public class TreeMapTest {  
      
        /* 
         * 判断2个单词是否指差一个字母 
         */  
        public static boolean oneCharOff(String s1, String s2) {  
            if (s1.length() != s2.length())  
                return false;  
            int diff = 0;  
            for (int i = 0; i < s1.length() - 1; i++) {  
                if (s1.charAt(i) != s2.charAt(i))  
                    diff++;  
                if (diff > 1)  
                    return false;  
            }  
      
            return diff == 1;  
        }  
      
        /* 
         * 打印方法 
         */  
        public static void print(Map<String, List<String>> map) {  
            for (Map.Entry<String, List<String>> entry : map.entrySet()) {  
                List<String> words = entry.getValue();  
                System.out.print(entry.getKey() + ":");  
                for (int i = 0; i < words.size(); i++) {  
                    System.out.print(words.get(i) + " ");  
                }  
                System.out.println();  
            }  
        }  
      
        /** 
         * 方法名:computeWords 说明:方法1 
         */  
        public static Map<String, List<String>> computeWords1(List<String> words) {  
      
            Map<String, List<String>> map = new TreeMap<String, List<String>>();  
            String[] word = new String[words.size()];  
            words.toArray(word);  
            for (int i = 0; i < word.length; i++) {  
                for (int j = i + 1; j < word.length; j++) {  
                    if (oneCharOff(word[i], word[j])) {  
                        update(map, word[i], word[j]);// 互为相似单词  
                        update(map, word[j], word[i]);  
                    }  
                }  
            }  
      
            return map;  
        }  
      
        /** 
         * 方法名:update 说明:更新 
         */  
        private static <KeyType> void update(Map<KeyType, List<String>> map,  
                KeyType key, String s) {  
            List<String> words = map.get(key);  
            if (words == null) {  
                words = new ArrayList<String>();  
                map.put(key, words);  
            }  
            words.add(s);  
        }  
      
        /** 
         * 方法名:groupByLength 说明:先将给的单词按照长度分组 
         */  
        private static Map<Integer, List<String>> groupByLength(List<String> words) {  
            Map<Integer, List<String>> map = new TreeMap<Integer, List<String>>();  
            for (String s : words)  
                update(map, s.length(), s);  
            return map;  
        }  
      
        /** 
         * 方法名:computeWord 说明:方法2 
         */  
        public static Map<String, List<String>> computeWords2(List<String> words) {  
      
            Map<String, List<String>> map = new TreeMap<String, List<String>>();  
      
            for (Entry<Integer, List<String>> entry : groupByLength(words)  
                    .entrySet()) {  
                String[] word = new String[entry.getValue().size()];  
                entry.getValue().toArray(word);  
                for (int i = 0; i < word.length; i++) {  
                    for (int j = i + 1; j < word.length; j++) {  
                        if (oneCharOff(word[i], word[j])) {  
                            update(map, word[i], word[j]);// 互为相似单词  
                            update(map, word[j], word[i]);  
                        }  
                    }  
                }  
            }  
            return map;  
      
        }  
      
        /** 
         * 方法名:computeWords3  
         * 说明:该方法效率最高。首先也是按照长度分组,分组完了之后,对每组分别做以下操作: 
         * 1:从头到尾分别去掉每个单词的一位字母。将剩下的作为一个键,该单词作为值 放到新建的 
         * map<String,List<String>>reToWord里 
         * 2:遍历reToWord,找到size>=2的,(因为只有>=2的 才代表含有相似的)例如wine和wane 当去掉第2位 
         * 时,首先wine会进List,wane匹配到了wne也会进去,所以是2个 
         */  
        public static Map<String, List<String>> computeWords3(List<String> words) {  
      
            Map<String, List<String>> adjWords = new TreeMap<String, List<String>>();  
            Map<Integer, List<String>> wordsByLength = new TreeMap<Integer, List<String>>();  
            for (String w : words)  
                update(wordsByLength, w.length(), w);  
      
            for (Map.Entry<Integer, List<String>> entry : wordsByLength.entrySet()) {  
                List<String> groupsWords = entry.getValue();  
                int groupNum = entry.getKey();  
                for (int i = 0; i < groupNum; i++) {  
                    Map<String, List<String>> repToWord = new TreeMap<String, List<String>>();  
                    for (String str : groupsWords) {  
                        String rep = str.substring(0, i) + str.substring(i + 1);  
                        update(repToWord, rep, str);  
                    }  
                    for (List<String> wordClique : repToWord.values())  
                        if (wordClique.size() >= 2)  
                            for (String s1 : wordClique)  
                                for (String s2 : wordClique)  
                                    if (s1 != s2)  
                                        update(adjWords, s1, s2);  
                }  
            }  
            return adjWords;  
        }  
      
        public static void main(String[] args) {  
            // TODO Auto-generated method stub  
            List<String> list = new ArrayList<String>();  
            list.add("wane");  
            list.add("wine");  
            list.add("aine");  
            list.add("dine");  
            list.add("anew");  
            list.add("kine");  
            list.add("qine");  
            list.add("eine");  
            list.add("rine");  
      
            print(computeWords1(list));  
            System.out.println();  
            print(computeWords2(list));  
            System.out.println();  
            print(computeWords3(list));  
        }  
      
    }  
来自:http://blog.csdn.net/xiuweikang/article/details/40626493
请尽量让自己的答案能够对别人有帮助

0个答案

默认排序 按投票排序