如下文件内容:
这里有特殊字符:\n 、\t 、\u4e0a 、\/
我要做的事,就是恢复其特殊字符的作用(而不是打印被转义后的效果)
直观的看,很容易:直接替换不就行了
line = line.replaceAll("\\n", "\r\n" );
line = line.replaceAll("\\t", "\t" );
line = line.replaceAll("\\/", "/" );
但是,这是
错误的。新生成的文件没有任何改变。
这里有个问题以前没弄清楚:文本文件中为 \n ,在读取后字符串中实际为 \\n
因而正确的替换方法为:
line = line.replaceAll("\\\\n", "\r\n" );
line = line.replaceAll("\\\\t", "\t" );
line = line.replaceAll("\\\\/", "/" );
接下来就是处理 Unicode码,将其还原
来源:http://www.cnblogs.com/yuxuan/archive/2011/08/02/2124904.html
/** *//*****************************************************
* 功能介绍:将unicode字符串转为汉字
* 输入参数:源unicode字符串
* 输出参数:转换后的字符串
*****************************************************/
static String decodeUnicode( final String dataStr ) {
int start = 0;
int end = 0;
final StringBuffer buffer = new StringBuffer();
while( start > -1 ) {
end = dataStr.indexOf( "\\\\u", start + 2 );
String charStr = "";
if( end == -1 ) {
charStr = dataStr.substring( start + 2, dataStr.length() );
} else {
charStr = dataStr.substring( start + 2, end);
}
char letter = (char) Integer.parseInt( charStr, 16 ); // 16进制parse整形字符串。
buffer.append( new Character( letter ).toString() );
start = end;
}
return buffer.toString();
}
有了 decodeUnicode 方法,接下来只需要将文件中匹配 \\uxxxx 这样的转换完即可:
static String replace( String s )
{
try {
Pattern regex = Pattern.compile("\\\\u[0-9a-z]{4}", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);
Matcher matcher = regex.matcher(s);
StringBuffer sb = new StringBuffer();
while (matcher.find()) {
matcher.appendReplacement(sb, decodeUnicode( matcher.group()) );
}
matcher.appendTail(sb);
return sb.toString();
} catch (Exception ex) {
throw new RuntimeException( "Something error." );
}
}
总的转换代码:
static void readToWrite( File file )
{
BufferedReader bufReader = null;
BufferedWriter bufWriter = null;
try {
bufReader = new BufferedReader( new FileReader(file) );
bufWriter = buildWriter( file );
String line = null;
while( (line = bufReader.readLine()) != null )
{
line = line.replaceAll("\\\\n", "\r\n" );
line = line.replaceAll("\\\\t", "\t" );
line = line.replaceAll("\\\\/", "/" );
line = replace( line );
bufWriter.write( line );
bufWriter.newLine();
}
} catch (IOException e) {
e.printStackTrace();
}
finally{
if( bufReader != null ){
try {
bufReader.close();
} catch (IOException e) {
e.printStackTrace();
}
bufReader = null;
}
if( bufWriter != null ){
try {
bufWriter.close();
} catch (IOException e) {
e.printStackTrace();
}
bufWriter = null;
}
}
}
static BufferedWriter buildWriter( File file ){
BufferedWriter bufWriter = null;
try {
String fullName = file.getCanonicalPath();
int splitPath = fullName.lastIndexOf( "\\" );
String path = fullName.substring( 0, splitPath );
String name = file.getName().replaceAll("\\.txt", "@\\.txt" );
bufWriter = new BufferedWriter( new FileWriter( path + "\\" + name ) );
return bufWriter;
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
static String replace( String s )
{
try {
Pattern regex = Pattern.compile("\\\\u[0-9a-z]{4}", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);
Matcher matcher = regex.matcher(s);
StringBuffer sb = new StringBuffer();
while (matcher.find()) {
matcher.appendReplacement(sb, decodeUnicode( matcher.group()) );
}
matcher.appendTail(sb);
return sb.toString();
} catch (Exception ex) {
throw new RuntimeException( "Something error." );
}
}
/** *//*****************************************************
* 功能介绍:将unicode字符串转为汉字
* 输入参数:源unicode字符串
* 输出参数:转换后的字符串
*****************************************************/
static String decodeUnicode( final String dataStr ) {
int start = 0;
int end = 0;
final StringBuffer buffer = new StringBuffer();
while( start > -1 ) {
end = dataStr.indexOf( "\\\\u", start + 2 );
String charStr = "";
if( end == -1 ) {
charStr = dataStr.substring( start + 2, dataStr.length() );
} else {
charStr = dataStr.substring( start + 2, end);
}
char letter = (char) Integer.parseInt( charStr, 16 ); // 16进制parse整形字符串。
buffer.append( new Character( letter ).toString() );
start = end;
}
return buffer.toString();
}
============================================================
/**将中文转为unicode 及转回中文函数转为unicode
*/
public static void writeUnicode(final DataOutputStream out, final String value) {
try {
final String unicode = gbEncoding( value );
final byte[] data = unicode.getBytes();
final int dataLength = data.length;
System.out.println( "Data Length is: " + dataLength );
System.out.println( "Data is: " + value );
out.writeInt( dataLength ); //先写出字符串的长度
out.write( data, 0, dataLength ); //然后写出转化后的字符串
} catch (IOException e) {
}
}
public static String gbEncoding( final String gbString ) {
char[] utfBytes = gbString.toCharArray();
String unicodeBytes = "";
for( int byteIndex = 0; byteIndex < utfBytes.length; byteIndex ++ ) {
String hexB = Integer.toHexString( utfBytes[ byteIndex ] );
if( hexB.length() <= 2 ) {
hexB = "00" + hexB;
}
unicodeBytes = unicodeBytes + "\\\\u" + hexB;
}
System.out.println( "unicodeBytes is: " + unicodeBytes );
return unicodeBytes;
}