From 3fd9a807194791012f244ee22bbd02f5d6e2965d Mon Sep 17 00:00:00 2001 From: gz1301 <109694228@qq.com> Date: Wed, 12 Dec 2018 10:15:14 +0800 Subject: [PATCH] bf and rk in python --- python/32_bf_rk/bf_rk.py | 93 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 python/32_bf_rk/bf_rk.py diff --git a/python/32_bf_rk/bf_rk.py b/python/32_bf_rk/bf_rk.py new file mode 100644 index 0000000..0cd2399 --- /dev/null +++ b/python/32_bf_rk/bf_rk.py @@ -0,0 +1,93 @@ +#!/usr/bin/python +# -*- coding: UTF-8 -*- + +from time import time + + +def bf(main, pattern): + """ + 字符串匹配,bf暴搜 + :param main: 主串 + :param pattern: 模式串 + :return: + """ + n = len(main) + m = len(pattern) + + if n <= m: + return 0 if pattern == main else -1 + + for i in range(n-m+1): + for j in range(m): + if main[i+j] == pattern[j]: + if j == m-1: + return i + else: + continue + else: + break + return -1 + + +def simple_hash(s, start, end): + """ + 计算子串的哈希值 + 每个字符取acs-ii码后求和 + :param s: + :param start: + :param end: + :return: + """ + assert start <= end + + ret = 0 + for c in s[start: end+1]: + ret += ord(c) + return ret + + +def rk(main, pattern): + n = len(main) + m = len(pattern) + + if n <= m: + return 0 if pattern == main else -1 + + # 子串哈希值表 + hash_memo = [None] * (n-m+1) + hash_memo[0] = simple_hash(main, 0, m-1) + for i in range(1, n-m+1): + hash_memo[i] = hash_memo[i-1] - simple_hash(main, i-1, i-1) + simple_hash(main, i+m-1, i+m-1) + + # 模式串哈希值 + hash_p = simple_hash(pattern, 0, m-1) + + for i, h in enumerate(hash_memo): + # 可能存在哈希冲突 + if h == hash_p: + if pattern == main[i:i+m]: + return i + else: + continue + return -1 + + +if __name__ == '__main__': + m_str = 'a'*10000 + p_str = 'a'*200+'b' + + print('--- time consume ---') + t = time() + print('[bf] result:', bf(m_str, p_str)) + print('[bf] time cost: {0:.5}s'.format(time()-t)) + + t = time() + print('[rk] result:', rk(m_str, p_str)) + print('[rk] time cost: {0:.5}s'.format(time()-t)) + + print('') + print('--- search ---') + m_str = 'thequickbrownfoxjumpsoverthelazydog' + p_str = 'jump' + print('[bf] result:', bf(m_str, p_str)) + print('[rk] result:', rk(m_str, p_str))