一溪风月
一溪风月
Published on 2023-05-10 / 27 Visits
0

KMP算法解决字符串匹配问题(Java)

KMP算法介绍

KMP算法的核心是利用匹配失败后的信息,尽量减少模式串与主串的匹配次数以达到快速匹配的目的。具体实现就是通过一个next()函数实现,函数本身包含了模式串的局部匹配信息。KMP算法的时间复杂度O(m+n)。

应用实例

案例:给定字符串str1 = "我爱北京天安门我爱中华我爱中国我爱共产党",字符串str2 = "我爱中国";要求匹配出str2在str1中第一次出现的位置。

解法一:暴力匹配法

代码实现

package com.athome.kmp;

/**
 * Description:
 * Author:江洋大盗
 * Date:2021/1/17 21:17
 */
public class ViolentSolution {
    public static void main(String[] args) {
        String str1 = "ABBCADDAAADDBC";
        String str2 = "ADDB";
        int i = violentSolution(str1, str2);
        System.out.println("第一次出现的角标为:" + i);

    }

    /**
     * @param str1 跟字串匹配的母串
     * @param str2 待匹配的字子符串
     * @return 返回字串第一次在母串中出现的位置,若查询不到返回-1
     */
    public static int violentSolution(String str1, String str2) {
        if (str1 == null || str2 == null) {
            throw new RuntimeException("参数传入有误!!!");
        }

        int str1Len = str1.length();
        int str2Len = str2.length();

        int i = 0;//记录str1的字符
        int j = 0;//记录str2的字符
        while (i < str1Len && j < str2Len) {
            if (str1.charAt(i) == str2.charAt(j)) {
                i++;
                j++;
            } else {
                //如果不相等,将i重置为刚开始匹配的下一个元素,j重置为0
                i = i - j + 1;
                j = 0;
            }
        }
        if (j == str2Len) {
            return i - j;
        } else {
            return -1;
        }
    }
}

解法二:KMP算法

代码实现

package com.athome.kmp;

import java.util.Arrays;

/**
 * Description:
 * Author:江洋大盗
 * Date:2021/1/17 21:37
 */
public class KMPSolution {
    public static void main(String[] args) {
        String str1 = "我爱北京天安门我爱中华我爱中国我爱共产党";
        String str2 = "我爱中国";
        int i = kmpSolution(str1, str2);
        System.out.println(i);
    }

    public static int kmpSolution(String str1, String str2) {
        int[] next = kmpNext(str2);
        for (int i = 0, j = 0; i < str1.length(); i++) {
            while (j > 0 && str1.charAt(i) != str2.charAt(j)) {
                j = next[j - 1];
            }
            if (str1.charAt(i) == str2.charAt(j)) {
                j++;
            }
            if (j == str2.length()) {
                return i - j + 1;
            }
        }
        return -1;
    }

    private static int[] kmpNext(String dest) {
        int[] next = new int[dest.length()];
        next[0] = 0;
        for (int i = 1, j = 0; i < dest.length(); i++) {
            while (j > 0 && dest.charAt(i) != dest.charAt(j)) {
                j = next[j - 1];
            }
            if (dest.charAt(i) == dest.charAt(j)) {
                j++;
            }
            next[i] = j;
        }
        return next;
    }
}

拓展

案例一:给定字符串str1 = "abvurhellovjdfapplelkvwert",字符串str2 = "vsdkjbvlrkhellosbg",找出两个字符串中的最大相同字串。

案例二:给定字符串str1 = "abvurhellovjdfapplelkvwert",字符串str2 = "vsdkjapplebvlrkhellosbg",找出两个字符串中的所有最大相同字串。

package com.athome.kmp;

import java.util.ArrayList;
import java.util.List;

/**
 * Description:
 * Author:江洋大盗
 * Date:2021/1/17 22:28
 */
public class IdenticalSubstring {
    public static void main(String[] args) {
        String str1 = "abvurhellovjdfapplelkvwert";
        String str2 = "vsdkapplejbvlrkhellosbg";
        List<String> allMaxSubstring = getAllMaxSubstring(str1, str2);
        System.out.println(allMaxSubstring);
    }

    public static String getMaxSubstring(String str1, String str2) {
        if (str1 == null || str2 == null) {
            throw new RuntimeException("参数传入有误");
        }
        String maxStr = (str1.length() > str2.length()) ? str1 : str2;
        String minStr = (str1.length() <= str2.length()) ? str1 : str2;
        int length = minStr.length();
        String maxSamePart = null;
        for (int i = 0; i < length; i++) {
            for (int j = 0, k = length - i; k <= length; j++, k++) {
                maxSamePart = minStr.substring(j, k);
                if (maxStr.contains(maxSamePart)) {
                    return maxSamePart;
                }
            }
        }
        return null;
    }

    public static List<String> getAllMaxSubstring(String str1, String str2) {
        if (str1 == null || str2 == null) {
            throw new RuntimeException("参数传入有误");
        }
        String maxStr = (str1.length() > str2.length()) ? str1 : str2;
        String minStr = (str1.length() <= str2.length()) ? str1 : str2;
        int length = minStr.length();
        String maxSamePart;
        int maxSize = 0;//记录最大值
        List<String> list = new ArrayList<>();
        label:
        for (int i = 0; i < length; i++) {
            for (int j = 0, k = length - i; k <= length; j++, k++) {
                maxSamePart = minStr.substring(j, k);
                if (maxStr.contains(maxSamePart)) {
                    if (maxSize == 0) {
                        maxSize = maxSamePart.length();
                    }
                    if (maxSamePart.length() < maxSize) {
                        break label;
                    }
                    list.add(maxSamePart);
                }
            }
        }
        return list;
    }
}

结语

只要能收获甜蜜,荆棘丛中也有蜜蜂忙碌的身影,未来的你一定会感谢现在努力的自己。