[Fact] public void IssuesTest_17() { var illegalWordsSearch = new IllegalWordsSearch(); string s = "中国|zg人|abc"; illegalWordsSearch.SetKeywords(s.Split('|')); var str = illegalWordsSearch.Replace("我是中美国人厉害中国完美abcddb好的", '*');
private static IllegalWordsSearch _search; /// <summary> /// 本地敏感库,文件修改后,重新创建缓存Bit /// </summary> /// <returns></returns> public static IllegalWordsSearch GetIllegalWordsSearch() { if (_search == null) { string ipath = Path.GetFullPath(InfoPath); if (File.Exists(ipath) == false) { _search = CreateIllegalWordsSearch(); } else { var texts = File.ReadAllText(ipath).Split('|'); if (new FileInfo(Path.GetFullPath(KeywordsPath)).LastWriteTime.ToString("yyyy-MM-dd HH:mm:ss") != texts[0] || new FileInfo(Path.GetFullPath(UrlsPath)).LastWriteTime.ToString("yyyy-MM-dd HH:mm:ss") != texts[1] ) { _search = CreateIllegalWordsSearch(); } else { var s = new IllegalWordsSearch(); s.Load(Path.GetFullPath(BitPath)); _search = s; } } } return _search; }
private static IllegalWordsSearch CreateIllegalWordsSearch() { string[] words1 = File.ReadAllLines(Path.GetFullPath(KeywordsPath), Encoding.UTF8); string[] words2 = File.ReadAllLines(Path.GetFullPath(UrlsPath), Encoding.UTF8); var words = new List<string>(); foreach (var item in words1) { words.Add(item.Trim()); } foreach (var item in words2) { words.Add(item.Trim()); }
var search = new IllegalWordsSearch(); search.SetKeywords(words);
search.Save(Path.GetFullPath(BitPath));
var text = new FileInfo(Path.GetFullPath(KeywordsPath)).LastWriteTime.ToString("yyyy-MM-dd HH:mm:ss") + "|"+ new FileInfo(Path.GetFullPath(UrlsPath)).LastWriteTime.ToString("yyyy-MM-dd HH:mm:ss"); File.WriteAllText(Path.GetFullPath(InfoPath), text);
return search; } }
2. 循环使用Replace
方案:通过维护敏感库,循环replace 大佬分享给我的,稍微改成了从文件中获取敏感字。
public static class StopWords {
static readonly ConcurrentDictionary<string, bool> FunNlpDataSensitive = new ConcurrentDictionary<string, bool>(); static readonly ConcurrentDictionary<int, string> ReplaceNewValue = new ConcurrentDictionary<int, string>();