1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
| Code de test :
Sub main()
Debug.Print Encode_UTF8("œ" )
Debug.Print Decode_UTF8(Encode_UTF8("œ" ))
Debug.Print Decode_UTF8("éa" )
Debug.Print isUTF8("éa" )
Debug.Print isUTF8("abcde" )
End Sub
Code principal :
Option Explicit
' Char. number range | UTF-8 octet sequence
' (hexadecimal) | (binary)
' --------------------+---------------------------------------------
' 0000 0000-0000 007F | 0xxxxxxx
' 0000 0080-0000 07FF | 110xxxxx 10xxxxxx
' 0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
' 0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
Public Function Encode_UTF8(astr)
Dim c
Dim n
Dim utftext
utftext = ""
n = 1
Do While n <= Len(astr)
c = AscW(Mid(astr, n, 1))
If c < 128 Then
utftext = utftext + Chr(c)
ElseIf ((c >= 128) And (c < 2048)) Then
utftext = utftext + Chr(((c \ 64) Or 192))
utftext = utftext + Chr(((c And 63) Or 128))
ElseIf ((c >= 2048) And (c < 65536)) Then
utftext = utftext + Chr(((c \ 4096) Or 224))
utftext = utftext + Chr((((c \ 64) And 63) Or 128))
utftext = utftext + Chr(((c And 63) Or 128))
Else ' c >= 65536
utftext = utftext + Chr(((c \ 262144) Or 240))
utftext = utftext + Chr(((((c \ 4096) And 63)) Or 128))
utftext = utftext + Chr((((c \ 64) And 63) Or 128))
utftext = utftext + Chr(((c And 63) Or 128))
End If
n = n + 1
Loop
Encode_UTF8 = utftext
End Function
' Char. number range | UTF-8 octet sequence
' (hexadecimal) | (binary)
' --------------------+---------------------------------------------
' 0000 0000-0000 007F | 0xxxxxxx
' 0000 0080-0000 07FF | 110xxxxx 10xxxxxx
' 0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
' 0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
Public Function Decode_UTF8(astr)
Dim c0, c1, c2, c3
Dim n
Dim unitext
If isUTF8(astr) = False Then
Decode_UTF8 = astr
Exit Function
End If
unitext = ""
n = 1
Do While n <= Len(astr)
c0 = Asc(Mid(astr, n, 1))
If n <= Len(astr) - 1 Then
c1 = Asc(Mid(astr, n + 1, 1))
Else
c1 = 0
End If
If n <= Len(astr) - 2 Then
c2 = Asc(Mid(astr, n + 2, 1))
Else
c2 = 0
End If
If n <= Len(astr) - 3 Then
c3 = Asc(Mid(astr, n + 3, 1))
Else
c3 = 0
End If
If (c0 And 240) = 240 And (c1 And 128) = 128 And (c2 And 128) = 128 And (c3 And 128) = 128 Then
unitext = unitext + ChrW((c0 - 240) * 65536 + (c1 - 128) * 4096) + (c2 - 128) * 64 + (c3 - 128)
n = n + 4
ElseIf (c0 And 224) = 224 And (c1 And 128) = 128 And (c2 And 128) = 128 Then
unitext = unitext + ChrW((c0 - 224) * 4096 + (c1 - 128) * 64 + (c2 - 128))
n = n + 3
ElseIf (c0 And 192) = 192 And (c1 And 128) = 128 Then
unitext = unitext + ChrW((c0 - 192) * 64 + (c1 - 128))
n = n + 2
ElseIf (c0 And 128) = 128 Then
unitext = unitext + ChrW(c0 And 127)
n = n + 1
Else ' c0 < 128
unitext = unitext + ChrW(c0)
n = n + 1
End If
Loop
Decode_UTF8 = unitext
End Function
' Char. number range | UTF-8 octet sequence
' (hexadecimal) | (binary)
' --------------------+---------------------------------------------
' 0000 0000-0000 007F | 0xxxxxxx
' 0000 0080-0000 07FF | 110xxxxx 10xxxxxx
' 0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
' 0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
Public Function isUTF8(astr)
Dim c0, c1, c2, c3
Dim n
isUTF8 = True
n = 1
Do While n <= Len(astr)
c0 = Asc(Mid(astr, n, 1))
If n <= Len(astr) - 1 Then
c1 = Asc(Mid(astr, n + 1, 1))
Else
c1 = 0
End If
If n <= Len(astr) - 2 Then
c2 = Asc(Mid(astr, n + 2, 1))
Else
c2 = 0
End If
If n <= Len(astr) - 3 Then
c3 = Asc(Mid(astr, n + 3, 1))
Else
c3 = 0
End If
If (c0 And 240) = 240 Then
If (c1 And 128) = 128 And (c2 And 128) = 128 And (c3 And 128) = 128 Then
n = n + 4
Else
isUTF8 = False
Exit Function
End If
ElseIf (c0 And 224) = 224 Then
If (c1 And 128) = 128 And (c2 And 128) = 128 Then
n = n + 3
Else
isUTF8 = False
Exit Function
End If
ElseIf (c0 And 192) = 192 Then
If (c1 And 128) = 128 Then
n = n + 2
Else
isUTF8 = False
Exit Function
End If
ElseIf (c0 And 128) = 0 Then
n = n + 1
Else
isUTF8 = False
Exit Function
End If
Loop
End Function |
Partager