Skip to content

26999: 2023找出全部子串位置

string, naive, kmp algorithm, http://cs101.openjudge.cn/practice/26999/

输入两个串 txt, pat,找出 pat 在 txt 中所有出现的位置

例如'aa'在 aaaa 里出现的位置有0,1,2

输入

第一行是整数n 接下来有n行,每行两个不带空格的字符串 txt, pat。0 < len(pat) <= len(txt) < 2* 10^7

输出

对每行,从小到大输出 pat 在 txt 中所有的出现位置。位置从0开始算 如果 pat 没出现过,输出 "no" 行末多输出空格没关系

样例输入

4
ababcdefgabdefab ab
aaaaaaaaa a
aaaaaaaaa aaa 
112123323 a

样例输出

0 2 9 14 
0 1 2 3 4 5 6 7 8 
0 1 2 3 4 5 6 
no
python
n = int(input())
for _ in range(n):
    s1, s2 = input().split()
    positions = []
    start = 0
    while True:
        pos = s1.find(s2, start)
        if pos == -1:
            break
        positions.append(pos)
        start = pos + 1
    if positions:
        for pos in positions:
            print(pos, end=" ")
        print("")
    else:
        print("no")

Naive pattern algorithm, Time Limit Exceeded

python
# https://www.geeksforgeeks.org/naive-algorithm-for-pattern-searching/
# Naive Pattern Searching algorithm
def search(pat, txt):
    M = len(pat)
    N = len(txt)

    res = []
    # A loop to slide pat[] one by one */
    for i in range(N - M + 1):
    #i = 0
    #while i < N - M + 1:
        j = 0

        # For current index i, check
        # for pattern match */
        while(j < M):
            if (txt[i + j] != pat[j]):
                #i = i+j + 1
                break
            j += 1

        if (j == M):
            res.append(str(i))
            #i += M

    return res


n = int(input())
for _ in range(n):
    txt, pat = input().split()
    ans = search(pat, txt)
    if ans:
        print(' '.join(ans))
    else:
        print('no')
python
# https://www.geeksforgeeks.org/kmp-algorithm-for-pattern-searching/
# KMP Algorithm
def KMPSearch(pat, txt):
    M = len(pat)
    N = len(txt)
    
    res = []

    # create lps[] that will hold the longest prefix suffix
    # values for pattern
    lps = [0]*M
    j = 0 # index for pat[]

    # Preprocess the pattern (calculate lps[] array)
    computeLPSArray(pat, M, lps)

    i = 0 # index for txt[]
    while (N - i) >= (M - j):
        if pat[j] == txt[i]:
            i += 1
            j += 1

        if j == M:
            #print("Found pattern at index " + str(i-j))
            cur = i - j
            res.append(str(cur))

            j = lps[j-1]

        # mismatch after j matches
        elif i < N and pat[j] != txt[i]:
            # Do not match lps[0..lps[j-1]] characters,
            # they will match anyway
            if j != 0:
                j = lps[j-1]
            else:
                i += 1
    
    return res


# Function to compute LPS array
def computeLPSArray(pat, M, lps):
    len = 0 # length of the previous longest prefix suffix

    lps[0] = 0 # lps[0] is always 0
    i = 1

    # the loop calculates lps[i] for i = 1 to M-1
    while i < M:
        if pat[i] == pat[len]:
            len += 1
            lps[i] = len
            i += 1
        else:
            # This is tricky. Consider the example.
            # AAACAAAA and i = 7. The idea is similar
            # to search step.
            if len != 0:
                len = lps[len-1]

                # Also, note that we do not increment i here
            else:
                lps[i] = 0
                i += 1


# Driver code
# if __name__ == '__main__':
#     txt = "ABABDABACDABABCABAB"
#     pat = "ABABCABAB"
#     KMPSearch(pat, txt)
# This code is contributed by Bhavya Jain
n = int(input())
for _ in range(n):
    txt, pat = input().split()
    ans = KMPSearch(pat, txt)
    if ans:
        print(' '.join(ans))
    else:
        print('no')