改进:改成匹配url是否为以某个结尾的,至于非法的url就让Jsoup.connect(url)把异常抛弃
//启动该正则匹配特别的慢
// public static String regex = "^([hH][tT]{2}[pP]:/*|[hH][tT]{2}[pP][sS]:/*|[fF][tT][pP]:/*)(([A-Za-z0-9-~]+).)+([A-Za-z0-9-~\\/])+(\\?{0,1}(([A-Za-z0-9-~]+\\={0,1})([A-Za-z0-9-~]*)\\&{0,1})*)$";public static final Pattern FILTERS = Pattern.compile(".*(\\.(css|js|bmp|gif|jpe?g" + "|png|tiff?|mid|mp2|mp3|mp4"+ "|wav|avi|mov|mpeg|ram|m4v|pdf" + "|rm|smil|wmv|swf|wma|zip|rar|gz))$");public static boolean isMatchURL(String url) {
// Pattern pattern = Pattern.compile(regex);if(FILTERS.matcher(url).matches()) {return false;}
// if (pattern.matcher(url).matches()) {
// return true;
// } return true;}