|
|||||||||||||||||||
| Source file | Conditionals | Statements | Methods | TOTAL | |||||||||||||||
| GenericWriter.java | 100% | 93.1% | 100% | 95.7% |
|
||||||||||||||
| 1 | /* Copyright 2002-2004 Elliotte Rusty Harold | |
| 2 | ||
| 3 | This library is free software; you can redistribute it and/or modify | |
| 4 | it under the terms of version 2.1 of the GNU Lesser General Public | |
| 5 | License as published by the Free Software Foundation. | |
| 6 | ||
| 7 | This library is distributed in the hope that it will be useful, | |
| 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 10 | GNU Lesser General Public License for more details. | |
| 11 | ||
| 12 | You should have received a copy of the GNU Lesser General Public | |
| 13 | License along with this library; if not, write to the | |
| 14 | Free Software Foundation, Inc., 59 Temple Place, Suite 330, | |
| 15 | Boston, MA 02111-1307 USA | |
| 16 | ||
| 17 | You can contact Elliotte Rusty Harold by sending e-mail to | |
| 18 | elharo@metalab.unc.edu. Please include the word "XOM" in the | |
| 19 | subject line. The XOM home page is located at http://www.xom.nu/ | |
| 20 | */ | |
| 21 | ||
| 22 | package nu.xom; | |
| 23 | ||
| 24 | import java.io.ByteArrayOutputStream; | |
| 25 | import java.io.IOException; | |
| 26 | import java.io.OutputStreamWriter; | |
| 27 | import java.io.UnsupportedEncodingException; | |
| 28 | import java.io.Writer; | |
| 29 | import java.util.Locale; | |
| 30 | ||
| 31 | /** | |
| 32 | * <p> | |
| 33 | * <code>GenericWriter</code> is a hack that figures out whether a | |
| 34 | * character is or is not available in a particular encoding by writing | |
| 35 | * it onto an OutputStream and seeing whether or not the character | |
| 36 | * written is a question mark (Java's substitution character). | |
| 37 | * There's a more staright-forward way to do this using | |
| 38 | * <code>java.nio.Charset</code> in Java 1.4, but I'm not willing to | |
| 39 | * assume Java 1.4. | |
| 40 | * </p> | |
| 41 | * | |
| 42 | * @author Elliotte Rusty Harold | |
| 43 | * @version 1.1b2 | |
| 44 | * | |
| 45 | */ | |
| 46 | class GenericWriter extends TextWriter { | |
| 47 | ||
| 48 | ||
| 49 | private final ByteArrayOutputStream bout; | |
| 50 | private final OutputStreamWriter wout; | |
| 51 | private final boolean isJapanese; | |
| 52 | ||
| 53 | ||
| 54 | 7 | GenericWriter(Writer out, String encoding) |
| 55 | throws UnsupportedEncodingException { | |
| 56 | ||
| 57 | 7 | super(out, encoding); |
| 58 | 7 | bout = new ByteArrayOutputStream(32); |
| 59 | 7 | wout = new OutputStreamWriter(bout, encoding); |
| 60 | 7 | encoding = encoding.toUpperCase(Locale.ENGLISH); |
| 61 | 7 | if (encoding.indexOf("EUC-JP") > -1 |
| 62 | || encoding.startsWith("EUC_JP") | |
| 63 | || encoding.equals("SHIFT_JIS") | |
| 64 | || encoding.equals("SJIS") | |
| 65 | || encoding.equals("ISO-2022-JP")) { | |
| 66 | 3 | isJapanese = true; |
| 67 | } | |
| 68 | else { | |
| 69 | 4 | isJapanese = false; |
| 70 | } | |
| 71 | ||
| 72 | } | |
| 73 | ||
| 74 | ||
| 75 | 1785561 | boolean needsEscaping(char c) { |
| 76 | ||
| 77 | // assume everything has at least the ASCII characters | |
| 78 | 1338471 | if (c <= 127) return false; |
| 79 | // work around various bugs in Japanese encodings | |
| 80 | 447090 | if (isJapanese) { |
| 81 | 3 | if (c == 0xA5) return true; // Yen symbol |
| 82 | 3 | if (c == 0x203E) return true; // Sun bugs in EUC-JP and SJIS |
| 83 | } | |
| 84 | ||
| 85 | 447084 | boolean result = false; |
| 86 | 447084 | try { |
| 87 | 447084 | wout.write(c); |
| 88 | 447084 | wout.flush(); |
| 89 | 447084 | byte[] data = bout.toByteArray(); |
| 90 | 1536 | if (data.length == 0) result = true; // surrogate pair |
| 91 | 347747 | else if (data[0] == '?') result = true; |
| 92 | // work around various bugs in Japanese encodings | |
| 93 | // especially in JDK 1.4.2_05 | |
| 94 | 57018 | else if (isJapanese && data[0] == 0x21) result = true; |
| 95 | } | |
| 96 | catch (IOException ex) { | |
| 97 | // There really shouldn't be any IOException here. | |
| 98 | // However character conversion bugs in Java 1.2 | |
| 99 | // sometimes throw one. In this case, we just say | |
| 100 | // escape it. | |
| 101 | 0 | return true; |
| 102 | } | |
| 103 | catch (Error err) { | |
| 104 | // This appears to be a wrapper around an undocumented | |
| 105 | // sun.io.UnknownCharacterException or some such. In any | |
| 106 | // case Java doesn't know how to output this character. | |
| 107 | 0 | return true; |
| 108 | } | |
| 109 | finally { | |
| 110 | 447084 | bout.reset(); |
| 111 | } | |
| 112 | 447084 | return result; |
| 113 | ||
| 114 | } | |
| 115 | ||
| 116 | ||
| 117 | } |
|
||||||||||