1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
|
/**
* replaces e.g. "\xc3\xa9" with "é"
*
* @param s the input
* @return the output
*/
public static String unescapeMultiByteUtf8Literals(final String s) {
try {
final String q = new String(unescapeByte(s.getBytes("UTF-8")), "UTF-8");
// if (!q.equals(s)) {
// LOG.log(Level.SEVERE, "multi byte utf literal found:\n" +
// " orig = " + s + "\n" +
// " escp = " + q
// );
// }
return q;
} catch (Exception e) {
// LOG.log(Level.SEVERE, "Could not unescape multi byte utf literal - will use original input: " + s, e);
return s;
}
}
private static byte[] unescapeByte(final byte[] escaped) throws Exception {
// simple state machine iterates over the escaped bytes and converts
final byte[] unescaped = new byte[escaped.length];
int posTarget = 0;
for (int posSource = 0; posSource < escaped.length; posSource++) {
// if its not special then just move on
if (escaped[posSource] != '\\') {
unescaped[posTarget] = escaped[posSource];
posTarget++;
continue;
}
// if there is no next byte, throw incorrect encoding error
if (posSource + 1 >= escaped.length) {
throw new Exception("String incorrectly escaped, ends with escape character.");
}
// deal with hex first
if (escaped[posSource + 1] == 'x') {
// if there's no next byte, throw incorrect encoding error
if (posSource + 3 >= escaped.length) {
throw new Exception("String incorrectly escaped, ends early with incorrect hex encoding.");
}
unescaped[posTarget] = (byte) ((Character.digit(escaped[posSource + 2], 16) << 4) + Character.digit(escaped[posSource + 3], 16));
posTarget++;
posSource += 3;
}
// deal with n, then t, then r
else if (escaped[posSource + 1] == 'n') {
unescaped[posTarget] = '\n';
posTarget++;
posSource++;
} else if (escaped[posSource + 1] == 't') {
unescaped[posTarget] = '\t';
posTarget++;
posSource++;
} else if (escaped[posSource + 1] == 'r') {
unescaped[posTarget] = '\r';
posTarget++;
posSource++;
} else if (escaped[posSource + 1] == '\\') {
unescaped[posTarget] = escaped[posSource + 1];
posTarget++;
posSource++;
} else if (escaped[posSource + 1] == '\'') {
unescaped[posTarget] = escaped[posSource + 1];
posTarget++;
posSource++;
} else {
// invalid character
throw new Exception("String incorrectly escaped, invalid escaped character");
}
}
final byte[] result = new byte[posTarget];
System.arraycopy(unescaped, 0, result, 0, posTarget);
// return byte array, not string. Callers can convert to string.
return result;
}
@POST
@Path("/send_photo")
@Consumes(MediaType.MULTIPART_FORM_DATA + ";charset=utf-8")
public Response insertPhoto(@FormDataParam("photo_fileStream") InputStream photo_fileStream_,
@FormDataParam("comment") String comment{
comment = unescapeMultiByteUtf8Literals(comment);
.....
} |
Partager