Ruby 3.3.0p0 (2023-12-25 revision 5124f9ac7513eb590c37717337c430cb93caa151)
parser.h
Go to the documentation of this file.
1
6#ifndef PRISM_PARSER_H
7#define PRISM_PARSER_H
8
9#include "prism/ast.h"
10#include "prism/defines.h"
11#include "prism/encoding.h"
13#include "prism/util/pm_list.h"
17
18#include <stdbool.h>
19
20// TODO: remove this by renaming the original flag
24#define PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS PM_KEYWORD_HASH_NODE_FLAGS_STATIC_KEYS
25
31typedef enum {
32 PM_LEX_STATE_BIT_BEG,
33 PM_LEX_STATE_BIT_END,
34 PM_LEX_STATE_BIT_ENDARG,
35 PM_LEX_STATE_BIT_ENDFN,
36 PM_LEX_STATE_BIT_ARG,
37 PM_LEX_STATE_BIT_CMDARG,
38 PM_LEX_STATE_BIT_MID,
39 PM_LEX_STATE_BIT_FNAME,
40 PM_LEX_STATE_BIT_DOT,
41 PM_LEX_STATE_BIT_CLASS,
42 PM_LEX_STATE_BIT_LABEL,
43 PM_LEX_STATE_BIT_LABELED,
44 PM_LEX_STATE_BIT_FITEM
46
51typedef enum {
52 PM_LEX_STATE_NONE = 0,
53 PM_LEX_STATE_BEG = (1 << PM_LEX_STATE_BIT_BEG),
54 PM_LEX_STATE_END = (1 << PM_LEX_STATE_BIT_END),
55 PM_LEX_STATE_ENDARG = (1 << PM_LEX_STATE_BIT_ENDARG),
56 PM_LEX_STATE_ENDFN = (1 << PM_LEX_STATE_BIT_ENDFN),
57 PM_LEX_STATE_ARG = (1 << PM_LEX_STATE_BIT_ARG),
58 PM_LEX_STATE_CMDARG = (1 << PM_LEX_STATE_BIT_CMDARG),
59 PM_LEX_STATE_MID = (1 << PM_LEX_STATE_BIT_MID),
60 PM_LEX_STATE_FNAME = (1 << PM_LEX_STATE_BIT_FNAME),
61 PM_LEX_STATE_DOT = (1 << PM_LEX_STATE_BIT_DOT),
62 PM_LEX_STATE_CLASS = (1 << PM_LEX_STATE_BIT_CLASS),
63 PM_LEX_STATE_LABEL = (1 << PM_LEX_STATE_BIT_LABEL),
64 PM_LEX_STATE_LABELED = (1 << PM_LEX_STATE_BIT_LABELED),
65 PM_LEX_STATE_FITEM = (1 << PM_LEX_STATE_BIT_FITEM),
66 PM_LEX_STATE_BEG_ANY = PM_LEX_STATE_BEG | PM_LEX_STATE_MID | PM_LEX_STATE_CLASS,
67 PM_LEX_STATE_ARG_ANY = PM_LEX_STATE_ARG | PM_LEX_STATE_CMDARG,
68 PM_LEX_STATE_END_ANY = PM_LEX_STATE_END | PM_LEX_STATE_ENDARG | PM_LEX_STATE_ENDFN
70
74typedef enum {
75 PM_HEREDOC_QUOTE_NONE,
76 PM_HEREDOC_QUOTE_SINGLE = '\'',
77 PM_HEREDOC_QUOTE_DOUBLE = '"',
78 PM_HEREDOC_QUOTE_BACKTICK = '`',
80
84typedef enum {
85 PM_HEREDOC_INDENT_NONE,
86 PM_HEREDOC_INDENT_DASH,
87 PM_HEREDOC_INDENT_TILDE,
89
97typedef struct pm_lex_mode {
99 enum {
102
108
114
117
123
129
136
138 union {
139 struct {
141 size_t nesting;
142
145
150 uint8_t incrementor;
151
153 uint8_t terminator;
154
159 uint8_t breakpoints[11];
160 } list;
161
162 struct {
166 size_t nesting;
167
172 uint8_t incrementor;
173
175 uint8_t terminator;
176
181 uint8_t breakpoints[6];
182 } regexp;
183
184 struct {
186 size_t nesting;
187
189 bool interpolation;
190
197
202 uint8_t incrementor;
203
208 uint8_t terminator;
209
214 uint8_t breakpoints[6];
215 } string;
216
217 struct {
219 const uint8_t *ident_start;
220
223
226
229
234 const uint8_t *next_start;
235
242 } heredoc;
243 } as;
244
248
254#define PM_LEX_STACK_SIZE 4
255
259typedef struct pm_parser pm_parser_t;
260
366
375
377typedef enum {
378 PM_COMMENT_INLINE,
379 PM_COMMENT_EMBDOC
381
397
404typedef struct {
407
409 const uint8_t *key_start;
410
412 const uint8_t *value_start;
413
415 uint32_t key_length;
416
418 uint32_t value_length;
420
426
435typedef struct {
441 void *data;
442
447 void (*callback)(void *data, pm_parser_t *parser, pm_token_t *token);
449
482
489struct pm_parser {
492
495
501
507
513
519
521 struct {
524
527
529 size_t index;
531
533 const uint8_t *start;
534
536 const uint8_t *end;
537
540
543
549 const uint8_t *next_start;
550
557 const uint8_t *heredoc_end;
558
561
564
567
570
573
576
579
585
592
598
604
610
616
619
627
633
638 int32_t start_line;
639
670
673
676
683
689
692
695
701
707
714};
715
716#endif
The abstract syntax tree.
uint16_t pm_node_flags_t
These are the flags embedded in the node struct.
Definition ast.h:1029
pm_heredoc_indent_t
The type of indentation that a heredoc uses.
Definition parser.h:84
struct pm_context_node pm_context_node_t
This is a node in a linked list of contexts.
#define PM_LEX_STACK_SIZE
We pre-allocate a certain number of lex states in order to avoid having to call malloc too many times...
Definition parser.h:254
struct pm_lex_mode pm_lex_mode_t
When lexing Ruby source, the lexer has a small amount of state to tell which kind of token it is curr...
struct pm_comment pm_comment_t
This is a node in the linked list of comments that we've found while parsing.
pm_lex_state_bit_t
This enum provides various bits that represent different kinds of states that the lexer can track.
Definition parser.h:31
pm_lex_state_t
This enum combines the various bits from the above enum into individual values that represent the var...
Definition parser.h:51
struct pm_scope pm_scope_t
This struct represents a node in a linked list of scopes.
pm_heredoc_quote_t
The type of quote that a heredoc uses.
Definition parser.h:74
void(* pm_encoding_changed_callback_t)(pm_parser_t *parser)
When the encoding that is being used to parse the source is changed by prism, we provide the ability ...
Definition parser.h:425
pm_context_t
While parsing, we keep track of a stack of contexts.
Definition parser.h:266
@ PM_CONTEXT_ELSIF
an elsif clause
Definition parser.h:298
@ PM_CONTEXT_ENSURE_DEF
an ensure statement within a method definition
Definition parser.h:307
@ PM_CONTEXT_ELSE
an else clause
Definition parser.h:295
@ PM_CONTEXT_FOR_INDEX
a for loop's index
Definition parser.h:313
@ PM_CONTEXT_CASE_WHEN
a case when statements
Definition parser.h:277
@ PM_CONTEXT_MODULE
a module declaration
Definition parser.h:328
@ PM_CONTEXT_DEF_PARAMS
a method definition's parameters
Definition parser.h:289
@ PM_CONTEXT_CASE_IN
a case in statements
Definition parser.h:280
@ PM_CONTEXT_SCLASS
a singleton class definition
Definition parser.h:355
@ PM_CONTEXT_UNLESS
an unless statement
Definition parser.h:358
@ PM_CONTEXT_POSTEXE
an END block
Definition parser.h:334
@ PM_CONTEXT_IF
an if statement
Definition parser.h:316
@ PM_CONTEXT_RESCUE_DEF
a rescue statement within a method definition
Definition parser.h:352
@ PM_CONTEXT_RESCUE
a rescue statement
Definition parser.h:349
@ PM_CONTEXT_LAMBDA_BRACES
a lambda expression with braces
Definition parser.h:319
@ PM_CONTEXT_PARENS
a parenthesized expression
Definition parser.h:331
@ PM_CONTEXT_BLOCK_BRACES
expressions in block arguments using braces
Definition parser.h:271
@ PM_CONTEXT_PREEXE
a BEGIN block
Definition parser.h:340
@ PM_CONTEXT_RESCUE_ELSE
a rescue else statement
Definition parser.h:343
@ PM_CONTEXT_UNTIL
an until statement
Definition parser.h:361
@ PM_CONTEXT_FOR
a for loop
Definition parser.h:310
@ PM_CONTEXT_PREDICATE
a predicate inside an if/elsif/unless statement
Definition parser.h:337
@ PM_CONTEXT_RESCUE_ELSE_DEF
a rescue else statement within a method definition
Definition parser.h:346
@ PM_CONTEXT_DEFAULT_PARAMS
a method definition's default parameter
Definition parser.h:292
@ PM_CONTEXT_CLASS
a class declaration
Definition parser.h:283
@ PM_CONTEXT_MAIN
the top level context
Definition parser.h:325
@ PM_CONTEXT_LAMBDA_DO_END
a lambda expression with do..end
Definition parser.h:322
@ PM_CONTEXT_BEGIN
a begin statement
Definition parser.h:268
@ PM_CONTEXT_ENSURE
an ensure statement
Definition parser.h:304
@ PM_CONTEXT_EMBEXPR
an interpolated expression
Definition parser.h:301
@ PM_CONTEXT_DEF
a method definition
Definition parser.h:286
@ PM_CONTEXT_BLOCK_KEYWORDS
expressions in block arguments using do..end
Definition parser.h:274
@ PM_CONTEXT_WHILE
a while statement
Definition parser.h:364
pm_comment_type_t
This is the type of a comment that we've found while parsing.
Definition parser.h:377
A data structure that stores a set of strings.
uint32_t pm_constant_id_t
A constant id is a unique identifier for a constant in the constant pool.
An abstract linked list.
A list of byte offsets of newlines in a string.
A stack of boolean values.
uint32_t pm_state_stack_t
A struct that represents a stack of boolean values.
A generic string type that can have various ownership semantics.
Macro definitions used throughout the prism library.
The encoding interface and implementations used by the parser.
C99 shim for <stdbool.h>
This is a node in the linked list of comments that we've found while parsing.
Definition parser.h:387
pm_list_node_t node
The embedded base node.
Definition parser.h:389
pm_comment_type_t type
The type of comment that we've found.
Definition parser.h:395
pm_location_t location
The location of the comment in the source.
Definition parser.h:392
A list of constant IDs.
The overall constant pool, which stores constants found while parsing.
This is a node in a linked list of contexts.
Definition parser.h:368
pm_context_t context
The context that this node represents.
Definition parser.h:370
struct pm_context_node * prev
A pointer to the previous context in the linked list.
Definition parser.h:373
This struct defines the functions necessary to implement the encoding interface so we can determine h...
Definition encoding.h:23
When you are lexing through a file, the lexer needs all of the information that the parser additional...
Definition parser.h:435
void * data
This opaque pointer is used to provide whatever information the user deemed necessary to the callback...
Definition parser.h:441
When lexing Ruby source, the lexer has a small amount of state to tell which kind of token it is curr...
Definition parser.h:97
enum pm_lex_mode::@91 mode
The type of this lex mode.
uint8_t terminator
This is the terminator of the list literal.
Definition parser.h:153
const uint8_t * ident_start
A pointer to the start of the heredoc identifier.
Definition parser.h:219
size_t nesting
This keeps track of the nesting level of the list.
Definition parser.h:141
bool interpolation
Whether or not interpolation is allowed in this list.
Definition parser.h:144
pm_heredoc_indent_t indent
The type of indentation that the heredoc uses.
Definition parser.h:228
uint8_t incrementor
When lexing a list, it takes into account balancing the terminator if the terminator is one of (),...
Definition parser.h:150
@ PM_LEX_LIST
This state is used when we are lexing a list of tokens, as in a w word list literal or a i symbol lis...
Definition parser.h:122
@ PM_LEX_EMBVAR
This state is used when we're lexing a variable that is embedded directly inside of a string with the...
Definition parser.h:113
@ PM_LEX_REGEXP
This state is used when a regular expression has been begun and we are looking for the terminator.
Definition parser.h:128
@ PM_LEX_DEFAULT
This state is used when any given token is being lexed.
Definition parser.h:101
@ PM_LEX_HEREDOC
This state is used when you are inside the content of a heredoc.
Definition parser.h:116
@ PM_LEX_EMBEXPR
This state is used when we're lexing as normal but inside an embedded expression of a string.
Definition parser.h:107
@ PM_LEX_STRING
This state is used when we are lexing a string or a string-like token, as in string content with eith...
Definition parser.h:134
struct pm_lex_mode * prev
The previous lex state so that it knows how to pop.
Definition parser.h:246
pm_heredoc_quote_t quote
The type of quote that the heredoc uses.
Definition parser.h:225
bool label_allowed
Whether or not at the end of the string we should allow a :, which would indicate this was a dynamic ...
Definition parser.h:196
union pm_lex_mode::@92 as
The data associated with this type of lex mode.
const uint8_t * next_start
This is the pointer to the character where lexing should resume once the heredoc has been completely ...
Definition parser.h:234
size_t ident_length
The length of the heredoc identifier.
Definition parser.h:222
size_t common_whitespace
This is used to track the amount of common whitespace on each line so that we know how much to dedent...
Definition parser.h:241
uint8_t breakpoints[11]
This is the character set that should be used to delimit the tokens within the list.
Definition parser.h:159
This struct represents an abstract linked list that provides common functionality.
Definition pm_list.h:46
This represents the overall linked list.
Definition pm_list.h:55
This represents a range of bytes in the source string to which a node or token corresponds.
Definition ast.h:543
This is a node in the linked list of magic comments that we've found while parsing.
Definition parser.h:404
uint32_t key_length
The length of the key in the source.
Definition parser.h:415
pm_list_node_t node
The embedded base node.
Definition parser.h:406
const uint8_t * key_start
A pointer to the start of the key in the source.
Definition parser.h:409
const uint8_t * value_start
A pointer to the start of the value in the source.
Definition parser.h:412
uint32_t value_length
The length of the value in the source.
Definition parser.h:418
A list of offsets of newlines in a string.
This struct represents the overall parser.
Definition parser.h:489
const pm_encoding_t * explicit_encoding
When a string-like expression is being lexed, any byte or escape sequence that resolves to a value wh...
Definition parser.h:669
pm_lex_state_t lex_state
The current state of the lexer.
Definition parser.h:491
const pm_encoding_t * encoding
The encoding functions for the current file is attached to the parser as it's parsing so that it can ...
Definition parser.h:584
bool pattern_matching_newlines
This flag indicates that we are currently parsing a pattern matching expression and impacts that calc...
Definition parser.h:688
const uint8_t * end
The pointer to the end of the source.
Definition parser.h:536
bool recovering
Whether or not we're currently recovering from a syntax error.
Definition parser.h:675
pm_node_flags_t integer_base
We want to add a flag to integer nodes that indicates their base.
Definition parser.h:626
pm_constant_pool_t constant_pool
This constant pool keeps all of the constants defined throughout the file so that we can reference th...
Definition parser.h:615
bool in_keyword_arg
This flag indicates that we are currently parsing a keyword argument.
Definition parser.h:691
bool suppress_warnings
Whether or not we should emit warnings.
Definition parser.h:713
pm_state_stack_t do_loop_stack
The stack used to determine if a do keyword belongs to the predicate of a while, until,...
Definition parser.h:512
const uint8_t * next_start
This is a special field set on the parser when we need the parser to jump to a specific location when...
Definition parser.h:549
pm_string_t filepath_string
This is the path of the file being parsed.
Definition parser.h:609
pm_list_t magic_comment_list
The list of magic comments that have been found while parsing.
Definition parser.h:563
int lambda_enclosure_nesting
Used to temporarily track the nesting of enclosures to determine if a { is the beginning of a lambda ...
Definition parser.h:500
pm_lex_callback_t * lex_callback
This is an optional callback that can be attached to the parser that will be called whenever a new to...
Definition parser.h:603
pm_token_t previous
The previous token we were considering.
Definition parser.h:539
struct pm_parser::@97 lex_modes
A stack of lex modes.
pm_string_t current_string
This string is used to pass information from the lexer to the parser.
Definition parser.h:632
bool encoding_changed
Whether or not the encoding has been changed by a magic comment.
Definition parser.h:682
pm_location_t data_loc
The optional location of the END keyword and its contents.
Definition parser.h:566
pm_context_node_t * current_context
The current parsing context.
Definition parser.h:578
pm_constant_id_t current_param_name
The current parameter name id on parsing its default value.
Definition parser.h:694
const uint8_t * start
The pointer to the start of the source.
Definition parser.h:533
pm_state_stack_t accepts_block_stack
The stack used to determine if a do keyword belongs to the beginning of a block.
Definition parser.h:518
int enclosure_nesting
Tracks the current nesting of (), [], and {}.
Definition parser.h:494
pm_list_t error_list
The list of errors that have been found while parsing.
Definition parser.h:572
const uint8_t * encoding_comment_start
This pointer indicates where a comment must start if it is to be considered an encoding comment.
Definition parser.h:597
pm_token_t current
The current token we're considering.
Definition parser.h:542
pm_lex_mode_t stack[PM_LEX_STACK_SIZE]
The stack of lexer modes.
Definition parser.h:526
pm_list_t warning_list
The list of warnings that have been found while parsing.
Definition parser.h:569
const uint8_t * heredoc_end
This field indicates the end of a heredoc whose identifier was found on the current line.
Definition parser.h:557
int brace_nesting
Used to track the nesting of braces to ensure we get the correct value when we are interpolating bloc...
Definition parser.h:506
pm_encoding_changed_callback_t encoding_changed_callback
When the encoding that is being used to parse the source is changed by prism, we provide the ability ...
Definition parser.h:591
int32_t start_line
The line number at the start of the parse.
Definition parser.h:638
pm_lex_mode_t * current
The current mode of the lexer.
Definition parser.h:523
pm_list_t comment_list
The list of comments that have been found while parsing.
Definition parser.h:560
size_t index
The current index into the lexer mode stack.
Definition parser.h:529
pm_scope_t * current_scope
The current local scope.
Definition parser.h:575
bool command_start
Whether or not we're at the beginning of a command.
Definition parser.h:672
bool frozen_string_literal
Whether or not we have found a frozen_string_literal magic comment with a true value.
Definition parser.h:706
pm_newline_list_t newline_list
This is the list of newline offsets in the source file.
Definition parser.h:618
bool semantic_token_seen
Whether or not the parser has seen a token that has semantic meaning (i.e., a token that is not a com...
Definition parser.h:700
This struct represents a node in a linked list of scopes.
Definition parser.h:454
struct pm_scope * previous
A pointer to the previous scope in the linked list.
Definition parser.h:459
pm_constant_id_list_t locals
The IDs of the locals in the given scope.
Definition parser.h:456
bool explicit_params
A boolean indicating whether or not this scope has explicit parameters.
Definition parser.h:472
bool closed
A boolean indicating whether or not this scope can see into its parent.
Definition parser.h:465
uint8_t numbered_parameters
An integer indicating the number of numbered parameters on this scope.
Definition parser.h:480
A generic string type that can have various ownership semantics.
Definition pm_string.h:30
This struct represents a token in the Ruby source.
Definition ast.h:528