Ruby 3.3.0p0 (2023-12-25 revision 5124f9ac7513eb590c37717337c430cb93caa151)
encoding.h
Go to the documentation of this file.
1
6#ifndef PRISM_ENCODING_H
7#define PRISM_ENCODING_H
8
9#include "prism/defines.h"
11
12#include <assert.h>
13#include <stdbool.h>
14#include <stddef.h>
15#include <stdint.h>
16
23typedef struct {
29 size_t (*char_width)(const uint8_t *b, ptrdiff_t n);
30
36 size_t (*alpha_char)(const uint8_t *b, ptrdiff_t n);
37
43 size_t (*alnum_char)(const uint8_t *b, ptrdiff_t n);
44
50 bool (*isupper_char)(const uint8_t *b, ptrdiff_t n);
51
56 const char *name;
57
63
68#define PRISM_ENCODING_ALPHABETIC_BIT 1 << 0
69
74#define PRISM_ENCODING_ALPHANUMERIC_BIT 1 << 1
75
80#define PRISM_ENCODING_UPPERCASE_BIT 1 << 2
81
91size_t pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n);
92
102size_t pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n);
103
113bool pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n);
114
121extern const uint8_t pm_encoding_unicode_table[256];
122
126typedef enum {
127 PM_ENCODING_UTF_8 = 0,
128 PM_ENCODING_ASCII_8BIT,
129 PM_ENCODING_BIG5,
130 PM_ENCODING_BIG5_HKSCS,
131 PM_ENCODING_BIG5_UAO,
132 PM_ENCODING_CESU_8,
133 PM_ENCODING_CP51932,
134 PM_ENCODING_CP850,
135 PM_ENCODING_CP852,
136 PM_ENCODING_CP855,
137 PM_ENCODING_CP949,
138 PM_ENCODING_CP950,
139 PM_ENCODING_CP951,
140 PM_ENCODING_EMACS_MULE,
141 PM_ENCODING_EUC_JP,
142 PM_ENCODING_EUC_JP_MS,
143 PM_ENCODING_EUC_JIS_2004,
144 PM_ENCODING_EUC_KR,
145 PM_ENCODING_EUC_TW,
146 PM_ENCODING_GB12345,
147 PM_ENCODING_GB18030,
148 PM_ENCODING_GB1988,
149 PM_ENCODING_GB2312,
150 PM_ENCODING_GBK,
151 PM_ENCODING_IBM437,
152 PM_ENCODING_IBM720,
153 PM_ENCODING_IBM737,
154 PM_ENCODING_IBM775,
155 PM_ENCODING_IBM852,
156 PM_ENCODING_IBM855,
157 PM_ENCODING_IBM857,
158 PM_ENCODING_IBM860,
159 PM_ENCODING_IBM861,
160 PM_ENCODING_IBM862,
161 PM_ENCODING_IBM863,
162 PM_ENCODING_IBM864,
163 PM_ENCODING_IBM865,
164 PM_ENCODING_IBM866,
165 PM_ENCODING_IBM869,
166 PM_ENCODING_ISO_8859_1,
167 PM_ENCODING_ISO_8859_2,
168 PM_ENCODING_ISO_8859_3,
169 PM_ENCODING_ISO_8859_4,
170 PM_ENCODING_ISO_8859_5,
171 PM_ENCODING_ISO_8859_6,
172 PM_ENCODING_ISO_8859_7,
173 PM_ENCODING_ISO_8859_8,
174 PM_ENCODING_ISO_8859_9,
175 PM_ENCODING_ISO_8859_10,
176 PM_ENCODING_ISO_8859_11,
177 PM_ENCODING_ISO_8859_13,
178 PM_ENCODING_ISO_8859_14,
179 PM_ENCODING_ISO_8859_15,
180 PM_ENCODING_ISO_8859_16,
181 PM_ENCODING_KOI8_R,
182 PM_ENCODING_KOI8_U,
183 PM_ENCODING_MAC_CENT_EURO,
184 PM_ENCODING_MAC_CROATIAN,
185 PM_ENCODING_MAC_CYRILLIC,
186 PM_ENCODING_MAC_GREEK,
187 PM_ENCODING_MAC_ICELAND,
188 PM_ENCODING_MAC_JAPANESE,
189 PM_ENCODING_MAC_ROMAN,
190 PM_ENCODING_MAC_ROMANIA,
191 PM_ENCODING_MAC_THAI,
192 PM_ENCODING_MAC_TURKISH,
193 PM_ENCODING_MAC_UKRAINE,
194 PM_ENCODING_SHIFT_JIS,
195 PM_ENCODING_SJIS_DOCOMO,
196 PM_ENCODING_SJIS_KDDI,
197 PM_ENCODING_SJIS_SOFTBANK,
198 PM_ENCODING_STATELESS_ISO_2022_JP,
199 PM_ENCODING_STATELESS_ISO_2022_JP_KDDI,
200 PM_ENCODING_TIS_620,
201 PM_ENCODING_US_ASCII,
202 PM_ENCODING_UTF8_MAC,
203 PM_ENCODING_UTF8_DOCOMO,
204 PM_ENCODING_UTF8_KDDI,
205 PM_ENCODING_UTF8_SOFTBANK,
206 PM_ENCODING_WINDOWS_1250,
207 PM_ENCODING_WINDOWS_1251,
208 PM_ENCODING_WINDOWS_1252,
209 PM_ENCODING_WINDOWS_1253,
210 PM_ENCODING_WINDOWS_1254,
211 PM_ENCODING_WINDOWS_1255,
212 PM_ENCODING_WINDOWS_1256,
213 PM_ENCODING_WINDOWS_1257,
214 PM_ENCODING_WINDOWS_1258,
215 PM_ENCODING_WINDOWS_31J,
216 PM_ENCODING_WINDOWS_874,
217 PM_ENCODING_MAXIMUM
219
223extern const pm_encoding_t pm_encodings[PM_ENCODING_MAXIMUM];
224
229#define PM_ENCODING_UTF_8_ENTRY (&pm_encodings[PM_ENCODING_UTF_8])
230
236#define PM_ENCODING_US_ASCII_ENTRY (&pm_encodings[PM_ENCODING_US_ASCII])
237
246const pm_encoding_t * pm_encoding_find(const uint8_t *start, const uint8_t *end);
247
248#endif
A custom strncasecmp implementation.
Macro definitions used throughout the prism library.
bool pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n)
Return true if the next character in the UTF-8 encoding if it is an uppercase character.
Definition encoding.c:2332
const pm_encoding_t pm_encodings[PM_ENCODING_MAXIMUM]
This is the table of all of the encodings that prism supports.
Definition encoding.c:4217
pm_encoding_type_t
These are all of the encodings that prism supports.
Definition encoding.h:126
const pm_encoding_t * pm_encoding_find(const uint8_t *start, const uint8_t *end)
Parse the given name of an encoding and return a pointer to the corresponding encoding struct if one ...
Definition encoding.c:4945
const uint8_t pm_encoding_unicode_table[256]
This lookup table is referenced in both the UTF-8 encoding file and the parser directly in order to s...
Definition encoding.c:2161
size_t pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n)
Return the size of the next character in the UTF-8 encoding if it is an alphabetical character.
Definition encoding.c:2292
size_t pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n)
Return the size of the next character in the UTF-8 encoding if it is an alphanumeric character.
Definition encoding.c:2312
C99 shim for <stdbool.h>
This struct defines the functions necessary to implement the encoding interface so we can determine h...
Definition encoding.h:23
bool multibyte
Return true if the encoding is a multibyte encoding.
Definition encoding.h:61
const char * name
The name of the encoding.
Definition encoding.h:56