功能
对输入的文字进行敏感词过滤。
算法原型
前缀树(字典树)
字符串 xwabfabcff 敏感词 adc bf be
1.首选敏感词建立一个字典树
2.三个指针:遍历指针,树指针,发现指针

以上是原型
整合到项目中的实现
@Service
public class SensitiveService implements InitializingBean{
public final static Logger logger = LoggerFactory.getLogger(SensitiveService.class);
@Override
public void afterPropertiesSet() throws Exception {
try{
InputStream is = Thread.currentThread().getContextClassLoader().getResourceAsStream("SensitiveWords.txt");
InputStreamReader read = new InputStreamReader(is);
BufferedReader bufferedReader = new BufferedReader(read);
String lineText ;
while ((lineText=bufferedReader.readLine())!=null){
addWord(lineText.trim());//把文件中的词建成一个节点树
}
read.close();
}catch (Exception e){
logger.error("读取敏感词时间失败");
}
}
private TrieNode root = new TrieNode();
//增加关键词
private void addWord(String lineText){
TrieNode temp = root;
for (int i=0;i<lineText.length();++i){
Character c = lineText.charAt(i);
if (isSymbol(c)){
continue;
}
TrieNode node = temp.getSubNode(c);
if (node == null){
node = new TrieNode();
temp.addSubNode(c,node);
}
temp = node;
if (i==lineText.length()-1){
temp.setKeyWordEnd(true);
}
}
}
/**
*
*/
private class TrieNode{
//是不是关键词的结尾
private boolean end = false;
//当前节点下的所有子节点
private Map<Character,TrieNode> subNodes= new HashMap<Character,TrieNode>();
//
public void addSubNode(Character key,TrieNode trieNode){
subNodes.put(key,trieNode);
}
TrieNode getSubNode(Character key){
return subNodes.get(key);
}
boolean isKeyWordEnd(){
return end;
}
void setKeyWordEnd(Boolean end){
this.end = end;
}
}
public boolean isSymbol(char c){
int ic = (int)c;
//表示东亚文字
return !CharUtils.isAsciiAlphanumeric(c)&& (ic<0x2E80 || ic>0x9FFF);
}
/**
*过滤的详细实现
* @param text
* @return
*/
public String filter(String text){
if (StringUtils.isBlank(text)){
return text;
}
String replacement="这是敏感词,被屏蔽了,对不起了老哥";
TrieNode tempNode = root;
int begin = 0;
int position = 0;
StringBuilder result = new StringBuilder();
while (position<text.length()){
char c = text.charAt(position);
if (isSymbol(c)){
if (tempNode ==root){
result.append(c);
++begin;
}
++position;
continue;
}
tempNode = tempNode.getSubNode(c);
if (tempNode==null){
result.append(text.charAt(begin));
position = begin+1;
begin = position;
tempNode = root;
}else if (tempNode.isKeyWordEnd()){
//发现敏感词
result.append(replacement);
position = position+1;
begin = position;
tempNode = root;
}else {
++position;
}
}
result.append(text.substring(begin));
return result.toString();
}