algo/python/32_bf_rk/bf_rk.py
2018-12-12 10:15:14 +08:00

94 lines
2.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/python
# -*- coding: UTF-8 -*-
from time import time
def bf(main, pattern):
"""
字符串匹配bf暴搜
:param main: 主串
:param pattern: 模式串
:return:
"""
n = len(main)
m = len(pattern)
if n <= m:
return 0 if pattern == main else -1
for i in range(n-m+1):
for j in range(m):
if main[i+j] == pattern[j]:
if j == m-1:
return i
else:
continue
else:
break
return -1
def simple_hash(s, start, end):
"""
计算子串的哈希值
每个字符取acs-ii码后求和
:param s:
:param start:
:param end:
:return:
"""
assert start <= end
ret = 0
for c in s[start: end+1]:
ret += ord(c)
return ret
def rk(main, pattern):
n = len(main)
m = len(pattern)
if n <= m:
return 0 if pattern == main else -1
# 子串哈希值表
hash_memo = [None] * (n-m+1)
hash_memo[0] = simple_hash(main, 0, m-1)
for i in range(1, n-m+1):
hash_memo[i] = hash_memo[i-1] - simple_hash(main, i-1, i-1) + simple_hash(main, i+m-1, i+m-1)
# 模式串哈希值
hash_p = simple_hash(pattern, 0, m-1)
for i, h in enumerate(hash_memo):
# 可能存在哈希冲突
if h == hash_p:
if pattern == main[i:i+m]:
return i
else:
continue
return -1
if __name__ == '__main__':
m_str = 'a'*10000
p_str = 'a'*200+'b'
print('--- time consume ---')
t = time()
print('[bf] result:', bf(m_str, p_str))
print('[bf] time cost: {0:.5}s'.format(time()-t))
t = time()
print('[rk] result:', rk(m_str, p_str))
print('[rk] time cost: {0:.5}s'.format(time()-t))
print('')
print('--- search ---')
m_str = 'thequickbrownfoxjumpsoverthelazydog'
p_str = 'jump'
print('[bf] result:', bf(m_str, p_str))
print('[rk] result:', rk(m_str, p_str))