algorithms-go/other/dfa/main.go
2021-04-07 14:27:14 +08:00

93 lines
2.5 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package main
import (
"fmt"
"strings"
)
var sensitiveWord = make(map[string]interface{})
var Set = make(map[string]interface{})
const InvalidWords = " ,~,!,@,#,$,%,^,&,*,(,),_,-,+,=,?,<,>,.,—,,。,/,\\,|,《,》,,;,:,,',,,“,"
var InvalidWord = make(map[string]interface{}) //无效词汇,不参与敏感词汇判断直接忽略
//生成违禁词集合
func AddSensitiveToMap(set map[string]interface{}) {
for key := range set {
str := []rune(key)
nowMap := sensitiveWord
for i := 0; i < len(str); i++ {
if _, ok := nowMap[string(str[i])]; !ok { //如果该key不存在
thisMap := make(map[string]interface{})
thisMap["isEnd"] = false
nowMap[string(str[i])] = thisMap
nowMap = thisMap
} else {
nowMap = nowMap[string(str[i])].(map[string]interface{})
}
if i == len(str)-1 {
nowMap["isEnd"] = true
}
}
}
}
//敏感词汇转换为*
func ChangeSensitiveWords(txt string, sensitive map[string]interface{}) (word string) {
str := []rune(txt)
nowMap := sensitive
start := -1
tag := -1
for i := 0; i < len(str); i++ {
if _, ok := InvalidWord[(string(str[i]))]; ok {
continue //如果是无效词汇直接跳过
}
if thisMap, ok := nowMap[string(str[i])].(map[string]interface{}); ok {
//记录敏感词第一个文字的位置
tag++
if tag == 0 {
start = i
}
//判断是否为敏感词的最后一个文字
if isEnd, _ := thisMap["isEnd"].(bool); isEnd {
//将敏感词的第一个文字到最后一个文字全部替换为“*”
for y := start; y < i+1; y++ {
str[y] = 42
}
//重置标志数据
nowMap = sensitive
start = -1
tag = -1
} else { //不是最后一个则将其包含的map赋值给nowMap
nowMap = nowMap[string(str[i])].(map[string]interface{})
}
} else { //如果敏感词不是全匹配,则终止此敏感词查找。从开始位置的第二个文字继续判断
if start != -1 {
i = start + 1
}
//重置标志参数
nowMap = sensitive
start = -1
tag = -1
}
}
return string(str)
}
func main() {
words := strings.Split(InvalidWords, ",")
for _, v := range words {
InvalidWord[v] = nil
}
Set["你妈逼的"] = nil
Set["你妈"] = nil
Set["狗日"] = nil
Set["流弊"] = nil
AddSensitiveToMap(Set)
text := "文明用语你&* 妈, 逼的你这个狗 日的,怎么这么傻啊。我也是服了,我日,这些话我都说不出口,流弊好不好"
fmt.Println(ChangeSensitiveWords(text, sensitiveWord))
}