Ruby 2.7.7p221 (2022-11-24 revision 168ec2b1e5ad0e4688e963d9de019557c78feed9)
psych_parser.c
Go to the documentation of this file.
1#include <psych.h>
2
4
5static ID id_read;
6static ID id_path;
7static ID id_empty;
8static ID id_start_stream;
9static ID id_end_stream;
10static ID id_start_document;
11static ID id_end_document;
12static ID id_alias;
13static ID id_scalar;
14static ID id_start_sequence;
15static ID id_end_sequence;
16static ID id_start_mapping;
17static ID id_end_mapping;
18static ID id_event_location;
19
20#define PSYCH_TRANSCODE(_str, _yaml_enc, _internal_enc) \
21 do { \
22 rb_enc_associate_index((_str), (_yaml_enc)); \
23 if(_internal_enc) \
24 (_str) = rb_str_export_to_enc((_str), (_internal_enc)); \
25 } while (0)
26
27static int io_reader(void * data, unsigned char *buf, size_t size, size_t *read)
28{
29 VALUE io = (VALUE)data;
30 VALUE string = rb_funcall(io, id_read, 1, INT2NUM(size));
31
32 *read = 0;
33
34 if(! NIL_P(string)) {
35 void * str = (void *)StringValuePtr(string);
36 *read = (size_t)RSTRING_LEN(string);
37 memcpy(buf, str, *read);
38 }
39
40 return 1;
41}
42
43static void dealloc(void * ptr)
44{
45 yaml_parser_t * parser;
46
47 parser = (yaml_parser_t *)ptr;
48 yaml_parser_delete(parser);
49 xfree(parser);
50}
51
52#if 0
53static size_t memsize(const void *ptr)
54{
55 const yaml_parser_t *parser = ptr;
56 /* TODO: calculate parser's size */
57 return 0;
58}
59#endif
60
61static const rb_data_type_t psych_parser_type = {
62 "Psych/parser",
63 {0, dealloc, 0,},
64 0, 0,
65#ifdef RUBY_TYPED_FREE_IMMEDIATELY
67#endif
68};
69
70static VALUE allocate(VALUE klass)
71{
72 yaml_parser_t * parser;
73 VALUE obj = TypedData_Make_Struct(klass, yaml_parser_t, &psych_parser_type, parser);
74
75 yaml_parser_initialize(parser);
76
77 return obj;
78}
79
80static VALUE make_exception(yaml_parser_t * parser, VALUE path)
81{
82 size_t line, column;
83 VALUE ePsychSyntaxError;
84
85 line = parser->context_mark.line + 1;
86 column = parser->context_mark.column + 1;
87
88 ePsychSyntaxError = rb_const_get(mPsych, rb_intern("SyntaxError"));
89
90 return rb_funcall(ePsychSyntaxError, rb_intern("new"), 6,
91 path,
92 INT2NUM(line),
93 INT2NUM(column),
94 INT2NUM(parser->problem_offset),
95 parser->problem ? rb_usascii_str_new2(parser->problem) : Qnil,
96 parser->context ? rb_usascii_str_new2(parser->context) : Qnil);
97}
98
99static VALUE transcode_string(VALUE src, int * parser_encoding)
100{
101 int utf8 = rb_utf8_encindex();
102 int utf16le = rb_enc_find_index("UTF-16LE");
103 int utf16be = rb_enc_find_index("UTF-16BE");
104 int source_encoding = rb_enc_get_index(src);
105
106 if (source_encoding == utf8) {
107 *parser_encoding = YAML_UTF8_ENCODING;
108 return src;
109 }
110
111 if (source_encoding == utf16le) {
112 *parser_encoding = YAML_UTF16LE_ENCODING;
113 return src;
114 }
115
116 if (source_encoding == utf16be) {
117 *parser_encoding = YAML_UTF16BE_ENCODING;
118 return src;
119 }
120
123
124 *parser_encoding = YAML_UTF8_ENCODING;
125 return src;
126}
127
128static VALUE transcode_io(VALUE src, int * parser_encoding)
129{
130 VALUE io_external_encoding;
131 int io_external_enc_index;
132
133 io_external_encoding = rb_funcall(src, rb_intern("external_encoding"), 0);
134
135 /* if no encoding is returned, assume ascii8bit. */
136 if (NIL_P(io_external_encoding)) {
137 io_external_enc_index = rb_ascii8bit_encindex();
138 } else {
139 io_external_enc_index = rb_to_encoding_index(io_external_encoding);
140 }
141
142 /* Treat US-ASCII as utf_8 */
143 if (io_external_enc_index == rb_usascii_encindex()) {
144 *parser_encoding = YAML_UTF8_ENCODING;
145 return src;
146 }
147
148 if (io_external_enc_index == rb_utf8_encindex()) {
149 *parser_encoding = YAML_UTF8_ENCODING;
150 return src;
151 }
152
153 if (io_external_enc_index == rb_enc_find_index("UTF-16LE")) {
154 *parser_encoding = YAML_UTF16LE_ENCODING;
155 return src;
156 }
157
158 if (io_external_enc_index == rb_enc_find_index("UTF-16BE")) {
159 *parser_encoding = YAML_UTF16BE_ENCODING;
160 return src;
161 }
162
163 /* Just guess on ASCII-8BIT */
164 if (io_external_enc_index == rb_ascii8bit_encindex()) {
165 *parser_encoding = YAML_ANY_ENCODING;
166 return src;
167 }
168
169 /* If the external encoding is something we don't know how to handle,
170 * fall back to YAML_ANY_ENCODING. */
171 *parser_encoding = YAML_ANY_ENCODING;
172
173 return src;
174}
175
176static VALUE protected_start_stream(VALUE pointer)
177{
178 VALUE *args = (VALUE *)pointer;
179 return rb_funcall(args[0], id_start_stream, 1, args[1]);
180}
181
182static VALUE protected_start_document(VALUE pointer)
183{
184 VALUE *args = (VALUE *)pointer;
185 return rb_funcall3(args[0], id_start_document, 3, args + 1);
186}
187
188static VALUE protected_end_document(VALUE pointer)
189{
190 VALUE *args = (VALUE *)pointer;
191 return rb_funcall(args[0], id_end_document, 1, args[1]);
192}
193
194static VALUE protected_alias(VALUE pointer)
195{
196 VALUE *args = (VALUE *)pointer;
197 return rb_funcall(args[0], id_alias, 1, args[1]);
198}
199
200static VALUE protected_scalar(VALUE pointer)
201{
202 VALUE *args = (VALUE *)pointer;
203 return rb_funcall3(args[0], id_scalar, 6, args + 1);
204}
205
206static VALUE protected_start_sequence(VALUE pointer)
207{
208 VALUE *args = (VALUE *)pointer;
209 return rb_funcall3(args[0], id_start_sequence, 4, args + 1);
210}
211
212static VALUE protected_end_sequence(VALUE handler)
213{
214 return rb_funcall(handler, id_end_sequence, 0);
215}
216
217static VALUE protected_start_mapping(VALUE pointer)
218{
219 VALUE *args = (VALUE *)pointer;
220 return rb_funcall3(args[0], id_start_mapping, 4, args + 1);
221}
222
223static VALUE protected_end_mapping(VALUE handler)
224{
225 return rb_funcall(handler, id_end_mapping, 0);
226}
227
228static VALUE protected_empty(VALUE handler)
229{
230 return rb_funcall(handler, id_empty, 0);
231}
232
233static VALUE protected_end_stream(VALUE handler)
234{
235 return rb_funcall(handler, id_end_stream, 0);
236}
237
238static VALUE protected_event_location(VALUE pointer)
239{
240 VALUE *args = (VALUE *)pointer;
241 return rb_funcall3(args[0], id_event_location, 4, args + 1);
242}
243
244/*
245 * call-seq:
246 * parser.parse(yaml)
247 *
248 * Parse the YAML document contained in +yaml+. Events will be called on
249 * the handler set on the parser instance.
250 *
251 * See Psych::Parser and Psych::Parser#handler
252 */
253static VALUE parse(int argc, VALUE *argv, VALUE self)
254{
255 VALUE yaml, path;
256 yaml_parser_t * parser;
257 yaml_event_t event;
258 int done = 0;
259 int state = 0;
260 int parser_encoding = YAML_ANY_ENCODING;
261 int encoding = rb_utf8_encindex();
262 rb_encoding * internal_enc = rb_default_internal_encoding();
263 VALUE handler = rb_iv_get(self, "@handler");
264
265 if (rb_scan_args(argc, argv, "11", &yaml, &path) == 1) {
266 if(rb_respond_to(yaml, id_path))
267 path = rb_funcall(yaml, id_path, 0);
268 else
269 path = rb_str_new2("<unknown>");
270 }
271
272 TypedData_Get_Struct(self, yaml_parser_t, &psych_parser_type, parser);
273
274 yaml_parser_delete(parser);
275 yaml_parser_initialize(parser);
276
277 if (rb_respond_to(yaml, id_read)) {
278 yaml = transcode_io(yaml, &parser_encoding);
279 yaml_parser_set_encoding(parser, parser_encoding);
280 yaml_parser_set_input(parser, io_reader, (void *)yaml);
281 } else {
282 StringValue(yaml);
283 yaml = transcode_string(yaml, &parser_encoding);
284 yaml_parser_set_encoding(parser, parser_encoding);
285 yaml_parser_set_input_string(
286 parser,
287 (const unsigned char *)RSTRING_PTR(yaml),
288 (size_t)RSTRING_LEN(yaml)
289 );
290 }
291
292 while(!done) {
293 VALUE event_args[5];
294 VALUE start_line, start_column, end_line, end_column;
295
296 if(!yaml_parser_parse(parser, &event)) {
297 VALUE exception;
298
299 exception = make_exception(parser, path);
300 yaml_parser_delete(parser);
301 yaml_parser_initialize(parser);
302
303 rb_exc_raise(exception);
304 }
305
306 start_line = INT2NUM((long)event.start_mark.line);
307 start_column = INT2NUM((long)event.start_mark.column);
308 end_line = INT2NUM((long)event.end_mark.line);
309 end_column = INT2NUM((long)event.end_mark.column);
310
311 event_args[0] = handler;
312 event_args[1] = start_line;
313 event_args[2] = start_column;
314 event_args[3] = end_line;
315 event_args[4] = end_column;
316 rb_protect(protected_event_location, (VALUE)event_args, &state);
317
318 switch(event.type) {
319 case YAML_STREAM_START_EVENT:
320 {
321 VALUE args[2];
322
323 args[0] = handler;
324 args[1] = INT2NUM((long)event.data.stream_start.encoding);
325 rb_protect(protected_start_stream, (VALUE)args, &state);
326 }
327 break;
328 case YAML_DOCUMENT_START_EVENT:
329 {
330 VALUE args[4];
331 /* Get a list of tag directives (if any) */
332 VALUE tag_directives = rb_ary_new();
333 /* Grab the document version */
334 VALUE version = event.data.document_start.version_directive ?
336 (long)2,
337 INT2NUM((long)event.data.document_start.version_directive->major),
338 INT2NUM((long)event.data.document_start.version_directive->minor)
339 ) : rb_ary_new();
340
341 if(event.data.document_start.tag_directives.start) {
342 yaml_tag_directive_t *start =
343 event.data.document_start.tag_directives.start;
344 yaml_tag_directive_t *end =
345 event.data.document_start.tag_directives.end;
346 for(; start != end; start++) {
347 VALUE handle = Qnil;
348 VALUE prefix = Qnil;
349 if(start->handle) {
350 handle = rb_str_new2((const char *)start->handle);
351 PSYCH_TRANSCODE(handle, encoding, internal_enc);
352 }
353
354 if(start->prefix) {
355 prefix = rb_str_new2((const char *)start->prefix);
356 PSYCH_TRANSCODE(prefix, encoding, internal_enc);
357 }
358
359 rb_ary_push(tag_directives, rb_ary_new3((long)2, handle, prefix));
360 }
361 }
362 args[0] = handler;
363 args[1] = version;
364 args[2] = tag_directives;
365 args[3] = event.data.document_start.implicit == 1 ? Qtrue : Qfalse;
366 rb_protect(protected_start_document, (VALUE)args, &state);
367 }
368 break;
369 case YAML_DOCUMENT_END_EVENT:
370 {
371 VALUE args[2];
372
373 args[0] = handler;
374 args[1] = event.data.document_end.implicit == 1 ? Qtrue : Qfalse;
375 rb_protect(protected_end_document, (VALUE)args, &state);
376 }
377 break;
378 case YAML_ALIAS_EVENT:
379 {
380 VALUE args[2];
381 VALUE alias = Qnil;
382 if(event.data.alias.anchor) {
383 alias = rb_str_new2((const char *)event.data.alias.anchor);
384 PSYCH_TRANSCODE(alias, encoding, internal_enc);
385 }
386
387 args[0] = handler;
388 args[1] = alias;
389 rb_protect(protected_alias, (VALUE)args, &state);
390 }
391 break;
392 case YAML_SCALAR_EVENT:
393 {
394 VALUE args[7];
395 VALUE anchor = Qnil;
396 VALUE tag = Qnil;
397 VALUE plain_implicit, quoted_implicit, style;
398 VALUE val = rb_str_new(
399 (const char *)event.data.scalar.value,
400 (long)event.data.scalar.length
401 );
402
403 PSYCH_TRANSCODE(val, encoding, internal_enc);
404
405 if(event.data.scalar.anchor) {
406 anchor = rb_str_new2((const char *)event.data.scalar.anchor);
407 PSYCH_TRANSCODE(anchor, encoding, internal_enc);
408 }
409
410 if(event.data.scalar.tag) {
411 tag = rb_str_new2((const char *)event.data.scalar.tag);
412 PSYCH_TRANSCODE(tag, encoding, internal_enc);
413 }
414
415 plain_implicit =
416 event.data.scalar.plain_implicit == 0 ? Qfalse : Qtrue;
417
418 quoted_implicit =
419 event.data.scalar.quoted_implicit == 0 ? Qfalse : Qtrue;
420
421 style = INT2NUM((long)event.data.scalar.style);
422
423 args[0] = handler;
424 args[1] = val;
425 args[2] = anchor;
426 args[3] = tag;
427 args[4] = plain_implicit;
428 args[5] = quoted_implicit;
429 args[6] = style;
430 rb_protect(protected_scalar, (VALUE)args, &state);
431 }
432 break;
433 case YAML_SEQUENCE_START_EVENT:
434 {
435 VALUE args[5];
436 VALUE anchor = Qnil;
437 VALUE tag = Qnil;
438 VALUE implicit, style;
439 if(event.data.sequence_start.anchor) {
440 anchor = rb_str_new2((const char *)event.data.sequence_start.anchor);
441 PSYCH_TRANSCODE(anchor, encoding, internal_enc);
442 }
443
444 tag = Qnil;
445 if(event.data.sequence_start.tag) {
446 tag = rb_str_new2((const char *)event.data.sequence_start.tag);
447 PSYCH_TRANSCODE(tag, encoding, internal_enc);
448 }
449
450 implicit =
451 event.data.sequence_start.implicit == 0 ? Qfalse : Qtrue;
452
453 style = INT2NUM((long)event.data.sequence_start.style);
454
455 args[0] = handler;
456 args[1] = anchor;
457 args[2] = tag;
458 args[3] = implicit;
459 args[4] = style;
460
461 rb_protect(protected_start_sequence, (VALUE)args, &state);
462 }
463 break;
464 case YAML_SEQUENCE_END_EVENT:
465 rb_protect(protected_end_sequence, handler, &state);
466 break;
467 case YAML_MAPPING_START_EVENT:
468 {
469 VALUE args[5];
470 VALUE anchor = Qnil;
471 VALUE tag = Qnil;
472 VALUE implicit, style;
473 if(event.data.mapping_start.anchor) {
474 anchor = rb_str_new2((const char *)event.data.mapping_start.anchor);
475 PSYCH_TRANSCODE(anchor, encoding, internal_enc);
476 }
477
478 if(event.data.mapping_start.tag) {
479 tag = rb_str_new2((const char *)event.data.mapping_start.tag);
480 PSYCH_TRANSCODE(tag, encoding, internal_enc);
481 }
482
483 implicit =
484 event.data.mapping_start.implicit == 0 ? Qfalse : Qtrue;
485
486 style = INT2NUM((long)event.data.mapping_start.style);
487
488 args[0] = handler;
489 args[1] = anchor;
490 args[2] = tag;
491 args[3] = implicit;
492 args[4] = style;
493
494 rb_protect(protected_start_mapping, (VALUE)args, &state);
495 }
496 break;
497 case YAML_MAPPING_END_EVENT:
498 rb_protect(protected_end_mapping, handler, &state);
499 break;
500 case YAML_NO_EVENT:
501 rb_protect(protected_empty, handler, &state);
502 break;
503 case YAML_STREAM_END_EVENT:
504 rb_protect(protected_end_stream, handler, &state);
505 done = 1;
506 break;
507 }
508 yaml_event_delete(&event);
509 if (state) rb_jump_tag(state);
510 }
511
512 return self;
513}
514
515/*
516 * call-seq:
517 * parser.mark # => #<Psych::Parser::Mark>
518 *
519 * Returns a Psych::Parser::Mark object that contains line, column, and index
520 * information.
521 */
522static VALUE mark(VALUE self)
523{
524 VALUE mark_klass;
525 VALUE args[3];
526 yaml_parser_t * parser;
527
528 TypedData_Get_Struct(self, yaml_parser_t, &psych_parser_type, parser);
529 mark_klass = rb_const_get_at(cPsychParser, rb_intern("Mark"));
530 args[0] = INT2NUM(parser->mark.index);
531 args[1] = INT2NUM(parser->mark.line);
532 args[2] = INT2NUM(parser->mark.column);
533
534 return rb_class_new_instance(3, args, mark_klass);
535}
536
538{
539#undef rb_intern
540#if 0
541 mPsych = rb_define_module("Psych");
542#endif
543
546
547 /* Any encoding: Let the parser choose the encoding */
548 rb_define_const(cPsychParser, "ANY", INT2NUM(YAML_ANY_ENCODING));
549
550 /* UTF-8 Encoding */
551 rb_define_const(cPsychParser, "UTF8", INT2NUM(YAML_UTF8_ENCODING));
552
553 /* UTF-16-LE Encoding with BOM */
554 rb_define_const(cPsychParser, "UTF16LE", INT2NUM(YAML_UTF16LE_ENCODING));
555
556 /* UTF-16-BE Encoding with BOM */
557 rb_define_const(cPsychParser, "UTF16BE", INT2NUM(YAML_UTF16BE_ENCODING));
558
559 rb_require("psych/syntax_error");
560
561 rb_define_method(cPsychParser, "parse", parse, -1);
562 rb_define_method(cPsychParser, "mark", mark, 0);
563
564 id_read = rb_intern("read");
565 id_path = rb_intern("path");
566 id_empty = rb_intern("empty");
567 id_start_stream = rb_intern("start_stream");
568 id_end_stream = rb_intern("end_stream");
569 id_start_document = rb_intern("start_document");
570 id_end_document = rb_intern("end_document");
571 id_alias = rb_intern("alias");
572 id_scalar = rb_intern("scalar");
573 id_start_sequence = rb_intern("start_sequence");
574 id_end_sequence = rb_intern("end_sequence");
575 id_start_mapping = rb_intern("start_mapping");
576 id_end_mapping = rb_intern("end_mapping");
577 id_event_location = rb_intern("event_location");
578}
579/* vim: set noet sws=4 sw=4: */
struct RIMemo * ptr
Definition: debug.c:65
int rb_enc_get_index(VALUE obj)
Definition: encoding.c:779
int rb_to_encoding_index(VALUE enc)
Definition: encoding.c:197
rb_encoding * rb_utf8_encoding(void)
Definition: encoding.c:1328
rb_encoding * rb_default_internal_encoding(void)
Definition: encoding.c:1512
int rb_utf8_encindex(void)
Definition: encoding.c:1334
int rb_ascii8bit_encindex(void)
Definition: encoding.c:1322
int rb_enc_find_index(const char *name)
Definition: encoding.c:693
int rb_usascii_encindex(void)
Definition: encoding.c:1346
VALUE rb_str_export_to_enc(VALUE, rb_encoding *)
Definition: string.c:1135
char str[HTML_ESCAPE_MAX_LEN+1]
Definition: escape.c:18
ID id_alias
Definition: eventids1.c:4
VALUE rb_define_class_under(VALUE, const char *, VALUE)
Defines a class under the namespace of outer.
Definition: class.c:711
VALUE rb_define_module(const char *)
Definition: class.c:785
VALUE rb_cObject
Object class.
Definition: ruby.h:2012
void rb_exc_raise(VALUE mesg)
Raises an exception in the current thread.
Definition: eval.c:668
VALUE rb_protect(VALUE(*)(VALUE), VALUE, int *)
Protects a function call from potential global escapes from the function.
Definition: eval.c:1072
void rb_jump_tag(int tag)
Continues the exception caught by rb_protect() and rb_eval_string_protect().
Definition: eval.c:884
VALUE rb_class_new_instance(int, const VALUE *, VALUE)
Allocates and initializes an instance of klass.
Definition: object.c:1955
const char * alias
Definition: nkf.c:1159
unsigned char buf[MIME_BUF_SIZE]
Definition: nkf.c:4322
VALUE mPsych
Definition: psych.c:21
VALUE cPsychParser
Definition: psych_parser.c:3
#define PSYCH_TRANSCODE(_str, _yaml_enc, _internal_enc)
Definition: psych_parser.c:20
void Init_psych_parser(void)
Definition: psych_parser.c:537
#define rb_str_new2
use StringValue() instead")))
#define RSTRING_LEN(str)
VALUE rb_const_get(VALUE, ID)
Definition: variable.c:2391
#define StringValuePtr(v)
#define xfree
const VALUE VALUE obj
#define RSTRING_PTR(str)
#define rb_str_new(str, len)
#define NIL_P(v)
int rb_respond_to(VALUE, ID)
Definition: vm_method.c:2207
const char const char *typedef unsigned long VALUE
VALUE rb_ary_push(VALUE, VALUE)
Definition: array.c:1195
__inline__ const void *__restrict__ src
void rb_define_alloc_func(VALUE, rb_alloc_func_t)
#define INT2NUM(x)
VALUE rb_const_get_at(VALUE, ID)
Definition: variable.c:2397
void rb_define_const(VALUE, const char *, VALUE)
Definition: variable.c:2891
#define RB_GC_GUARD(v)
#define RUBY_TYPED_FREE_IMMEDIATELY
#define TypedData_Get_Struct(obj, type, data_type, sval)
#define rb_ary_new3
#define rb_funcall(recv, mid, argc,...)
VALUE rb_ary_new(void)
Definition: array.c:723
#define rb_scan_args(argc, argvp, fmt,...)
#define rb_usascii_str_new2
#define rb_intern(str)
VALUE rb_require(const char *)
Definition: load.c:1161
unsigned int size
#define Qtrue
long unsigned int size_t
#define Qnil
#define Qfalse
void * memcpy(void *__restrict__, const void *__restrict__, size_t)
#define TypedData_Make_Struct(klass, type, data_type, sval)
const VALUE * argv
VALUE rb_iv_get(VALUE, const char *)
Definition: variable.c:3305
#define rb_funcall3
unsigned long ID
void rb_define_method(VALUE, const char *, VALUE(*)(), int)
_ssize_t read(int __fd, void *__buf, size_t __nbyte)
unsigned long VALUE
Definition: ruby.h:102