94 lines
2.0 KiB
Python
94 lines
2.0 KiB
Python
|
#!/usr/bin/python
|
|||
|
# -*- coding: UTF-8 -*-
|
|||
|
|
|||
|
from time import time
|
|||
|
|
|||
|
|
|||
|
def bf(main, pattern):
|
|||
|
"""
|
|||
|
字符串匹配,bf暴搜
|
|||
|
:param main: 主串
|
|||
|
:param pattern: 模式串
|
|||
|
:return:
|
|||
|
"""
|
|||
|
n = len(main)
|
|||
|
m = len(pattern)
|
|||
|
|
|||
|
if n <= m:
|
|||
|
return 0 if pattern == main else -1
|
|||
|
|
|||
|
for i in range(n-m+1):
|
|||
|
for j in range(m):
|
|||
|
if main[i+j] == pattern[j]:
|
|||
|
if j == m-1:
|
|||
|
return i
|
|||
|
else:
|
|||
|
continue
|
|||
|
else:
|
|||
|
break
|
|||
|
return -1
|
|||
|
|
|||
|
|
|||
|
def simple_hash(s, start, end):
|
|||
|
"""
|
|||
|
计算子串的哈希值
|
|||
|
每个字符取acs-ii码后求和
|
|||
|
:param s:
|
|||
|
:param start:
|
|||
|
:param end:
|
|||
|
:return:
|
|||
|
"""
|
|||
|
assert start <= end
|
|||
|
|
|||
|
ret = 0
|
|||
|
for c in s[start: end+1]:
|
|||
|
ret += ord(c)
|
|||
|
return ret
|
|||
|
|
|||
|
|
|||
|
def rk(main, pattern):
|
|||
|
n = len(main)
|
|||
|
m = len(pattern)
|
|||
|
|
|||
|
if n <= m:
|
|||
|
return 0 if pattern == main else -1
|
|||
|
|
|||
|
# 子串哈希值表
|
|||
|
hash_memo = [None] * (n-m+1)
|
|||
|
hash_memo[0] = simple_hash(main, 0, m-1)
|
|||
|
for i in range(1, n-m+1):
|
|||
|
hash_memo[i] = hash_memo[i-1] - simple_hash(main, i-1, i-1) + simple_hash(main, i+m-1, i+m-1)
|
|||
|
|
|||
|
# 模式串哈希值
|
|||
|
hash_p = simple_hash(pattern, 0, m-1)
|
|||
|
|
|||
|
for i, h in enumerate(hash_memo):
|
|||
|
# 可能存在哈希冲突
|
|||
|
if h == hash_p:
|
|||
|
if pattern == main[i:i+m]:
|
|||
|
return i
|
|||
|
else:
|
|||
|
continue
|
|||
|
return -1
|
|||
|
|
|||
|
|
|||
|
if __name__ == '__main__':
|
|||
|
m_str = 'a'*10000
|
|||
|
p_str = 'a'*200+'b'
|
|||
|
|
|||
|
print('--- time consume ---')
|
|||
|
t = time()
|
|||
|
print('[bf] result:', bf(m_str, p_str))
|
|||
|
print('[bf] time cost: {0:.5}s'.format(time()-t))
|
|||
|
|
|||
|
t = time()
|
|||
|
print('[rk] result:', rk(m_str, p_str))
|
|||
|
print('[rk] time cost: {0:.5}s'.format(time()-t))
|
|||
|
|
|||
|
print('')
|
|||
|
print('--- search ---')
|
|||
|
m_str = 'thequickbrownfoxjumpsoverthelazydog'
|
|||
|
p_str = 'jump'
|
|||
|
print('[bf] result:', bf(m_str, p_str))
|
|||
|
print('[rk] result:', rk(m_str, p_str))
|