5 changed files with 338 additions and 2 deletions
@ -0,0 +1,13 @@ |
|||||
|
package com.kms; |
||||
|
|
||||
|
|
||||
|
import cn.hutool.core.util.EscapeUtil; |
||||
|
|
||||
|
public class Test { |
||||
|
public static void main(String[] args) { |
||||
|
String html = "<script>alert(1);</script>"; |
||||
|
System.out.println(EscapeUtil.escape(html)); |
||||
|
System.out.println(EscapeUtil.escape(html)); |
||||
|
System.out.println(EscapeUtil.unescape(html)); |
||||
|
} |
||||
|
} |
@ -0,0 +1,112 @@ |
|||||
|
package com.jianwei.common.utils; |
||||
|
|
||||
|
import java.util.regex.Pattern; |
||||
|
|
||||
|
import java.util.regex.Matcher; |
||||
|
public class XssFilterUtils { |
||||
|
private static final String HTML_ESCAPE_REGEX = "[&\"'<>`]"; |
||||
|
private static final String[] HTML_ESCAPE_REPLACEMENTS = {"&", """, "<", ">", "'", ">"}; |
||||
|
|
||||
|
private static final Pattern SCRIPT_PATTERN = Pattern.compile( |
||||
|
"<\\s*script\\s*.*?>", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); |
||||
|
private static final Pattern EVENT_HANDLER_PATTERN = Pattern.compile( |
||||
|
"on\\w+\\s*=\\s*['\"]?.*?['\"]?", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); |
||||
|
private static final Pattern DANGEROUS_PROTOCOL_PATTERN = Pattern.compile( |
||||
|
"(javascript|vbscript|data|file|about|chrome|moz-extension):", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); |
||||
|
private static final Pattern CSS_EXPRESSION_PATTERN = Pattern.compile( |
||||
|
"expression\\s*\\(|url\\s*\\([^\\)]*?javascript:", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); |
||||
|
|
||||
|
private XssFilterUtils() { |
||||
|
throw new IllegalStateException("Utility class"); |
||||
|
} |
||||
|
|
||||
|
public static String filter(String input) { |
||||
|
if (input == null || input.isEmpty()) { |
||||
|
return input; |
||||
|
} |
||||
|
|
||||
|
String processed = input.trim(); |
||||
|
processed = sanitizeHtmlTags(processed); |
||||
|
processed = sanitizeEventHandlers(processed); |
||||
|
processed = sanitizeDangerousProtocols(processed); |
||||
|
processed = sanitizeCssExpressions(processed); |
||||
|
processed = escapeHtmlCharacters(processed); |
||||
|
|
||||
|
return processed; |
||||
|
} |
||||
|
|
||||
|
private static String sanitizeHtmlTags(String input) { |
||||
|
return SCRIPT_PATTERN.matcher(input).replaceAll(""); |
||||
|
} |
||||
|
|
||||
|
private static String sanitizeEventHandlers(String input) { |
||||
|
return EVENT_HANDLER_PATTERN.matcher(input).replaceAll(""); |
||||
|
} |
||||
|
|
||||
|
private static String sanitizeDangerousProtocols(String input) { |
||||
|
Matcher matcher = DANGEROUS_PROTOCOL_PATTERN.matcher(input); |
||||
|
StringBuffer result = new StringBuffer(); |
||||
|
while (matcher.find()) { |
||||
|
String protocol = matcher.group(1).toLowerCase(); |
||||
|
matcher.appendReplacement(result, protocol + "://"); |
||||
|
} |
||||
|
matcher.appendTail(result); |
||||
|
return result.toString(); |
||||
|
} |
||||
|
|
||||
|
private static String sanitizeCssExpressions(String input) { |
||||
|
return CSS_EXPRESSION_PATTERN.matcher(input).replaceAll(""); |
||||
|
} |
||||
|
|
||||
|
private static String escapeHtmlCharacters(String input) { |
||||
|
Matcher matcher = Pattern.compile(HTML_ESCAPE_REGEX).matcher(input); |
||||
|
StringBuffer result = new StringBuffer(); |
||||
|
while (matcher.find()) { |
||||
|
char c = matcher.group().charAt(0); |
||||
|
String replacement; |
||||
|
switch (c) { |
||||
|
case '&': |
||||
|
replacement = HTML_ESCAPE_REPLACEMENTS[0]; |
||||
|
break; |
||||
|
case '"': |
||||
|
replacement = HTML_ESCAPE_REPLACEMENTS[1]; |
||||
|
break; |
||||
|
case '<': |
||||
|
replacement = HTML_ESCAPE_REPLACEMENTS[2]; |
||||
|
break; |
||||
|
case '>': |
||||
|
replacement = HTML_ESCAPE_REPLACEMENTS[3]; |
||||
|
break; |
||||
|
case '\'': |
||||
|
replacement = HTML_ESCAPE_REPLACEMENTS[4]; |
||||
|
break; |
||||
|
case '`': |
||||
|
replacement = HTML_ESCAPE_REPLACEMENTS[5]; |
||||
|
break; |
||||
|
default: |
||||
|
replacement = matcher.group(); |
||||
|
} |
||||
|
matcher.appendReplacement(result, replacement); |
||||
|
} |
||||
|
matcher.appendTail(result); |
||||
|
return result.toString(); |
||||
|
} |
||||
|
|
||||
|
public static void main(String[] args) { |
||||
|
// 测试用例
|
||||
|
String[] testCases = { |
||||
|
"<script>alert('xss')</script>", |
||||
|
"<a href='javascript:alert(1)'>click me</a>", |
||||
|
"<div onmouseover=alert(2)>hover me</div>", |
||||
|
"<!--<img src=x onerror=alert(3)>-->", |
||||
|
"expression(alert(4))", |
||||
|
"url(javascript:alert(5))" |
||||
|
}; |
||||
|
|
||||
|
for (String testCase : testCases) { |
||||
|
System.out.println("原始输入: " + testCase); |
||||
|
System.out.println("过滤后: " + filter(testCase)); |
||||
|
System.out.println("------------------------"); |
||||
|
} |
||||
|
} |
||||
|
} |
@ -0,0 +1,208 @@ |
|||||
|
package com.jianwei.common.utils; |
||||
|
|
||||
|
import org.jsoup.Jsoup; |
||||
|
import org.jsoup.nodes.Document; |
||||
|
import org.jsoup.safety.Safelist; |
||||
|
|
||||
|
import java.util.regex.Pattern; |
||||
|
|
||||
|
public class XssUtils { |
||||
|
private static final Pattern[] XSS_PATTERNS = { |
||||
|
// 避免 script 标签
|
||||
|
Pattern.compile("<script>(.*?)</script>", Pattern.CASE_INSENSITIVE), |
||||
|
Pattern.compile("<iframe>(.*?)</iframe>", Pattern.CASE_INSENSITIVE), |
||||
|
Pattern.compile("<button>(.*?)</button>", Pattern.CASE_INSENSITIVE), |
||||
|
Pattern.compile("<button/>", Pattern.CASE_INSENSITIVE), |
||||
|
// 避免 src='...' 形式的表达式
|
||||
|
Pattern.compile("src[\r\n]*=[\r\n]*\\\'(.*?)\\\'", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL), |
||||
|
Pattern.compile("src[\r\n]*=[\r\n]*\\\"(.*?)\\\"", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL), |
||||
|
// 避免单独的 script 表达式
|
||||
|
Pattern.compile("</script>", Pattern.CASE_INSENSITIVE), |
||||
|
Pattern.compile("</iframe>", Pattern.CASE_INSENSITIVE), |
||||
|
Pattern.compile( |
||||
|
"on\\w+\\s*=\\s*['\"]?.*?['\"]?", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE), |
||||
|
|
||||
|
Pattern.compile( |
||||
|
"expression\\s*\\(|url\\s*\\([^\\)]*?javascript:", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE), |
||||
|
Pattern.compile( |
||||
|
"(javascript|vbscript|data|file|about|chrome|moz-extension):", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE), |
||||
|
|
||||
|
Pattern.compile("<script(.*?)>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL), |
||||
|
Pattern.compile("<button(.*?)>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL), |
||||
|
Pattern.compile("<iframe(.*?)>", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL), |
||||
|
// 避免 eval(...) 表达式
|
||||
|
Pattern.compile("eval\\((.*?)\\)", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL), |
||||
|
// 避免 expression(...) 表达式
|
||||
|
Pattern.compile("expression\\((.*?)\\)", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL), |
||||
|
// 避免 javascript:... 表达式
|
||||
|
Pattern.compile("javascript:", Pattern.CASE_INSENSITIVE), |
||||
|
// 避免 vbscript:... 表达式
|
||||
|
Pattern.compile("vbscript:", Pattern.CASE_INSENSITIVE), |
||||
|
// 避免 onload= 表达式
|
||||
|
Pattern.compile("onload(.*?)=", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL) |
||||
|
}; |
||||
|
|
||||
|
public static String stripXss(String value) { |
||||
|
if (value != null) { |
||||
|
String cleanValue = value; |
||||
|
for (Pattern pattern : XSS_PATTERNS) { |
||||
|
cleanValue = pattern.matcher(cleanValue).replaceAll(""); |
||||
|
} |
||||
|
return cleanValue; |
||||
|
} |
||||
|
return null; |
||||
|
} |
||||
|
|
||||
|
public static String xssContent(String content) { |
||||
|
Safelist whitelist = new Safelist(); |
||||
|
//先加标签
|
||||
|
|
||||
|
whitelist.addTags("a"); |
||||
|
whitelist.addTags("abbr"); |
||||
|
whitelist.addTags("address"); |
||||
|
whitelist.addTags("area"); |
||||
|
whitelist.addTags("article"); |
||||
|
whitelist.addTags("aside"); |
||||
|
whitelist.addTags("b"); |
||||
|
whitelist.addTags("bdi"); |
||||
|
whitelist.addTags("bdo"); |
||||
|
whitelist.addTags("big"); |
||||
|
whitelist.addTags("blockquote"); |
||||
|
whitelist.addTags("br"); |
||||
|
whitelist.addTags("caption"); |
||||
|
whitelist.addTags("center"); |
||||
|
whitelist.addTags("cite"); |
||||
|
whitelist.addTags("code"); |
||||
|
whitelist.addTags("col"); |
||||
|
whitelist.addTags("colgroup"); |
||||
|
whitelist.addTags("dd"); |
||||
|
whitelist.addTags("del"); |
||||
|
whitelist.addTags("details"); |
||||
|
whitelist.addTags("div"); |
||||
|
whitelist.addTags("dl"); |
||||
|
whitelist.addTags("dt"); |
||||
|
whitelist.addTags("em"); |
||||
|
whitelist.addTags("figcaption"); |
||||
|
whitelist.addTags("figure"); |
||||
|
whitelist.addTags("font"); |
||||
|
whitelist.addTags("footer"); |
||||
|
whitelist.addTags("h1"); |
||||
|
whitelist.addTags("h2"); |
||||
|
whitelist.addTags("h3"); |
||||
|
whitelist.addTags("h4"); |
||||
|
whitelist.addTags("h5"); |
||||
|
whitelist.addTags("h6"); |
||||
|
whitelist.addTags("header"); |
||||
|
whitelist.addTags("hr"); |
||||
|
whitelist.addTags("i"); |
||||
|
whitelist.addTags("img"); |
||||
|
whitelist.addTags("ins"); |
||||
|
whitelist.addTags("li"); |
||||
|
whitelist.addTags("mark"); |
||||
|
whitelist.addTags("nav"); |
||||
|
whitelist.addTags("ol"); |
||||
|
whitelist.addTags("p"); |
||||
|
whitelist.addTags("pre"); |
||||
|
whitelist.addTags("s"); |
||||
|
whitelist.addTags("section"); |
||||
|
whitelist.addTags("small"); |
||||
|
whitelist.addTags("span"); |
||||
|
whitelist.addTags("sub"); |
||||
|
whitelist.addTags("summary"); |
||||
|
whitelist.addTags("sup"); |
||||
|
whitelist.addTags("strong"); |
||||
|
whitelist.addTags("strike"); |
||||
|
whitelist.addTags("table"); |
||||
|
whitelist.addTags("tbody"); |
||||
|
whitelist.addTags("td"); |
||||
|
whitelist.addTags("tfoot"); |
||||
|
whitelist.addTags("th"); |
||||
|
whitelist.addTags("thead"); |
||||
|
whitelist.addTags("tr"); |
||||
|
whitelist.addTags("tt"); |
||||
|
whitelist.addTags("u"); |
||||
|
whitelist.addTags("ul"); |
||||
|
//再加attributes
|
||||
|
whitelist.addAttributes("a","target", "title","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("abbr","title","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("address","title","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("area","shape", "coords", "alt","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("article","title","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("aside","title","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("b","title","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("bdi","dir","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("bdo","dir","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("big","dir","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("blockquote","cite","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("br","title","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("caption","title","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("center","title","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("cite","title","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("code","title","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("col","align", "valign", "span", "width","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("colgroup","align", "valign", "span", "width","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("dd","title","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("del","datetime","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("details","open","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("div","open","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("dl","open","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("dt","open","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("em","open","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("figcaption","open","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("figure","open","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("font","color", "size", "face","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("footer","color", "size", "face","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("h1","color", "size", "face","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("h2","color", "size", "face","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("h3","color", "size", "face","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("h4","color", "size", "face","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("h5","color", "size", "face","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("h6","color", "size", "face","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("header","color", "size", "face","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("hr","color", "size", "face","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("i","color", "size", "face","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("img","src", "alt", "title", "width", "height","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("ins","datetime","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("li","datetime","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("ins","datetime","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("mark","datetime","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("nav","datetime","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("ol","datetime","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("p","datetime","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("pre","datetime","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("s","datetime","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("section","datetime","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("small","datetime","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("span","datetime","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("sub","datetime","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("summary","datetime","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("sup","datetime","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("strong","datetime","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("strike","datetime","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("table","width", "border", "align", "valign","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("tbody","align", "valign","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("td","width", "rowspan", "colspan", "align", "valign","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("tfoot","align", "valign","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("th","width", "rowspan", "colspan", "align", "valign","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("thead","align", "valign","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("tr","rowspan", "align", "valign","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("tt","style","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("u","style","data-row","data-cell","data-rowspan","style","class"); |
||||
|
whitelist.addAttributes("ul","style","data-row","data-cell","data-rowspan","style","class"); |
||||
|
return Jsoup.clean(content,"" ,whitelist,new Document.OutputSettings().prettyPrint(false)); |
||||
|
} |
||||
|
|
||||
|
public static void main(String[] args) { |
||||
|
String testCase = "<div style='color:red;' onclike=''><button><p>Hello <a href='javascript:alert(1)' target='_blank'>Link</a></p>" + |
||||
|
"<img src='x onerror=alert(2)' alt='test' width='100' height='200'></div>"; |
||||
|
|
||||
|
System.out.println("原始输入: " + testCase); |
||||
|
System.out.println("过滤后: " + xssContent(testCase)); |
||||
|
/* 输出示例: |
||||
|
<div><p>Hello <a target="_blank" href="safe-javascript:alert(1)">Link</a></p> |
||||
|
<img alt="test" width="100" height="200" src="x onerror=alert(2)"></div> |
||||
|
(实际会进一步过滤非法属性,此处简化展示逻辑) |
||||
|
*/ |
||||
|
} |
||||
|
|
||||
|
} |
Loading…
Reference in new issue