Edit on GitHub

parsetypes

This package provides tools for parsing serialised data to recover their original underlying types.

The TypeParser class provides configurable type inference and parsing. This can be initialised with different settings to, for example:

  • treat inf as either a float or a normal string
  • give exact Decimal values instead of floats
  • detect inline lists
 1"""
 2	This package provides tools for parsing serialised data to recover their original underlying types.
 3
 4	The `TypeParser` class provides configurable type inference and parsing. This can be initialised with different settings to, for example:
 5	- treat `inf` as either a float or a normal string
 6	- give exact Decimal values instead of floats
 7	- detect inline lists
 8"""
 9
10
11__version__ = "0.2.1"
12
13from ._common import AnyScalar, AnyScalarType, AnyValue, AnyValueType, GenericValue, Nullable
14from ._parser import TypeParser
15from ._reduce_types import reduce_types
16
17__all__ = ('TypeParser', 'reduce_types')
class TypeParser:
 60class TypeParser:
 61	"""
 62		A parser that can be used to infer the underlying types of data serialised as strings, and to convert them into their original underlying types.
 63
 64		Instances of this class can be configured with different settings for the parser and inferrer. See the constructor for more details about the available options.
 65	"""
 66
 67	def __init__(self,
 68	    *,
 69		trim: bool=True,
 70		use_decimal: bool=False,
 71		list_delimiter: Optional[str]=None,
 72		none_values: Iterable[str]=[""],
 73		none_case_sensitive: bool=False,
 74		true_values: Iterable[str]=["true"],
 75		false_values: Iterable[str]=["false"],
 76		bool_case_sensitive: bool=False,
 77		int_case_sensitive: bool=False,
 78		inf_values: Iterable[str]=[],
 79		nan_values: Iterable[str]=[],
 80		float_case_sensitive: bool=False,
 81		case_sensitive: Optional[bool]=None,
 82	):
 83		"""
 84			Initialise a new parser
 85
 86			Parameters
 87			----------
 88			`trim`
 89			: whether leading and trailing whitespace should be stripped from strings
 90
 91			`use_decimal`
 92			: whether non-integer numeric values should be inferred as Decimal (exact values) instead of float (non-exact values). Note that this only applies to methods that attempt to infer type (`infer()` and `infer_*()`), and does not affect methods where the type is explicitly specified (`is_float()`, `is_decimal()`, `parse_float()`, `parse_decimal()`).
 93
 94			`list_delimiter`
 95			: the delimiter used for identifying lists and for separating list items. If set to None, the parser will not attempt to identify lists when inferring types, which usually causes the value to be treated as a str instead.
 96
 97			`none_values`
 98			: list of strings that represent the value None
 99
100			`none_case_sensitive`
101			: whether matches against `none_values` should be made in a case-sensitive manner
102
103			`true_values`
104			: list of strings that represent the bool value True
105
106			`false_values`
107			: list of strings that represent the bool value False
108
109			`bool_case_sensitive`
110			: whether matches against `true_values` and `false_values` should be made in a case-sensitive manner
111
112			`int_case_sensitive`
113			: whether checks for int should be done in a case-sensitive manner. This usually only applies to values given in scientific notation, where the mantissa and exponent usually are separated by `e`.
114
115			`inf_values`
116			: list of strings that represent the float or Decimal value of infinity. Each of the strings can be prepended with a negative sign to represent negative infinity also.
117
118			`nan_values`
119			: list of strings that represent a float or Decimal that is NaN (not a number)
120
121			`float_case_sensitive`
122			: whether checks for float should be done in a case-sensitive manner. This applies to matches against `inf_values` and `nan_values`, as well as to values given in scientific notation, where the mantissa and exponent are usually separated by `e`.
123
124			`case_sensitive`
125			: whether all matches should be made in a case-sensitive manner. Sets all of `none_case_sensitive`, `bool_case_sensitive`, `int_case_sensitive`, `float_case_sensitive` to the same value, ignoring any individual settings.
126
127			Raises
128			------
129			`ValueError` if any of the options would lead to ambiguities during parsing
130		"""
131
132		if case_sensitive is not None:
133			none_case_sensitive = case_sensitive
134			int_case_sensitive = case_sensitive
135			bool_case_sensitive = case_sensitive
136			float_case_sensitive = case_sensitive
137
138		self.trim = trim
139		if self.trim:
140			none_values = (value.strip() for value in none_values)
141			true_values = (value.strip() for value in true_values)
142			false_values = (value.strip() for value in false_values)
143			inf_values = (value.strip() for value in inf_values)
144			nan_values = (value.strip() for value in nan_values)
145
146		self.use_decimal = use_decimal
147		self.list_delimiter = list_delimiter
148
149		self.none_case_sensitive = none_case_sensitive
150		if not self.none_case_sensitive:
151			none_values = (value.lower() for value in none_values)
152		self.none_values = set(none_values)
153
154		self.bool_case_sensitive = bool_case_sensitive
155		if not self.bool_case_sensitive:
156			true_values = (value.lower() for value in true_values)
157			false_values = (value.lower() for value in false_values)
158		self.true_values = set(true_values)
159		self.false_values = set(false_values)
160
161		self.int_case_sensitive = int_case_sensitive
162
163		self.float_case_sensitive = float_case_sensitive
164		if not self.float_case_sensitive:
165			inf_values = (value.lower() for value in inf_values)
166			nan_values = (value.lower() for value in nan_values)
167		self.inf_values = set(inf_values)
168		self.nan_values = set(nan_values)
169
170		# Unconfigurable default values
171		self._negative_char = "-"
172		self._negative_chars = {self._negative_char, "−"}
173		self._sign_chars = self._negative_chars | {"+"}
174		self._digit_chars = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"}  # Because isdigit("²") == True, but int("²") is invalid
175		self._digit_separators = {"_"}
176		self._scientific_char = "e"
177		self._float_separator = "."
178		self._reserved_chars = self._sign_chars | self._digit_chars | self._digit_separators | {self._scientific_char} | {self._float_separator}
179		# special_chars = self._reserved_chars | self.list_delimiter
180
181		# Check if any special values conflict
182		for name, special_values in [
183			(_SpecialValue.LIST, [self.list_delimiter] if self.list_delimiter is not None else []),
184			(_SpecialValue.NONE, self.none_values),
185			(_SpecialValue.TRUE, self.true_values),
186			(_SpecialValue.FALSE, self.false_values),
187			(_SpecialValue.INF, self.inf_values),
188			(_SpecialValue.NAN, self.nan_values),
189		]:
190			for special_value in special_values:
191				if special_value in self._reserved_chars:
192					raise ValueError(f"cannot use reserved char as {name.value}: {special_value}")
193
194				if name != _SpecialValue.NONE and self.is_none(special_value):
195					raise ValueError(f"cannot use None value as {name.value}: {special_value}")
196
197				if (
198					(name == _SpecialValue.TRUE and self.parse_bool(special_value) != True) or
199					(name == _SpecialValue.FALSE and self.parse_bool(special_value) != False) or
200					(name != _SpecialValue.TRUE and name != _SpecialValue.FALSE and self.is_bool(special_value))
201				):
202					raise ValueError(f"cannot use bool value as {name.value}: {special_value}")
203
204				if self.is_int(special_value):
205					raise ValueError(f"cannot use int value as {name.value}: {special_value}")
206
207				if self.use_decimal:
208					if (
209						(name == _SpecialValue.INF and self.parse_decimal(special_value) != Decimal(math.inf)) or
210						(name == _SpecialValue.NAN and not self.parse_decimal(special_value).is_nan()) or
211						(name != _SpecialValue.INF and name != _SpecialValue.NAN and self.is_float(special_value))
212					):
213						raise ValueError(f"cannot use Decimal value as {name}: {special_value}")
214				else:
215					if (
216						(name == _SpecialValue.INF and self.parse_float(special_value) != math.inf) or
217						(name == _SpecialValue.NAN and self.parse_float(special_value) is not math.nan) or
218						(name != _SpecialValue.INF and name != _SpecialValue.NAN and self.is_float(special_value))
219					):
220						raise ValueError(f"cannot use float value as {name}: {special_value}")
221
222
223	def is_none(self, value: str) -> bool:
224		"""
225			Check if a string represents the value None
226
227			Only strings that match the values in `self.none_values` will be interpreted as None. The default accepted values are `[""]`, i.e. an empty string. The case sensitivity of this matching depends on `self.none_case_sensitive`, which is False by default.
228
229			Parameters
230			----------
231			`value`
232			: string to be checked
233
234			Returns
235			-------
236			whether it is None
237
238			Examples
239			--------
240			```python
241			parser = TypeParser()
242			parser.parse_bool("")     # True
243			parser.parse_bool("abc")  # False
244			```
245		"""
246		if self.trim:
247			value = value.strip()
248		if not self.bool_case_sensitive:
249			value = value.lower()
250
251		if value in self.none_values:
252			return True
253		else:
254			return False
255
256
257	def is_bool(self, value: str) -> bool:
258		"""
259			Check if a string represents a bool
260
261			Only strings that match the values in `self.true_values` and `self.false_values` will be interpreted as booleans. The default accepted values are `["true"]` and `["false"]` respectively. The case sensitivity of this matching depends on `self.bool_case_sensitive`, which is False by default.
262
263			Parameters
264			----------
265			`value`
266			: string to be checked
267
268			Returns
269			-------
270			whether it is a bool
271
272			Examples
273			--------
274			```python
275			parser = TypeParser()
276			parser.is_bool("true")  # True
277			parser.is_bool("")      # True
278			parser.is_bool("abc")   # False
279			```
280		"""
281		if self.trim:
282			value = value.strip()
283
284		if not self.bool_case_sensitive:
285			value = value.lower()
286		if value in self.true_values:
287			return True
288		if value in self.false_values:
289			return True
290
291		return False
292
293
294	def is_int(self, value: str, *, allow_sign: bool=True, allow_negative: bool=True, allow_scientific: bool=True) -> bool:
295		"""
296			Check if a string represents an int
297
298			Parameters
299			----------
300			`value`
301			: string to be checked
302
303			`allow_negative`
304			: whether to accept negative values
305
306			`allow_sign`
307			: whether to accept signed values. If False, it implies that `allow_negative` is False also.
308
309			`allow_scientific`
310			: whether to accept scientific notation. If True, strings of the form `"<var>M</var>e<var>X</var>"` will be interpreted as the expression `<var>M</var> * (10 ** <var>X</var>)`, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note <var>M</var> *must* be an integer and <var>X</var> *must* be a non-negative integer, even in cases where the above expression evaluates mathematically to an integer.
311
312			Returns
313			-------
314			whether it is an int
315
316			Examples
317			--------
318			```python
319			parser = TypeParser()
320			parser.is_int("0")    # True
321			parser.is_int("-1")   # True
322			parser.is_int("abc")  # False
323			parser.is_int("")     # False
324			```
325		"""
326		if self.trim:
327			value = value.strip()
328
329		if len(value) == 0:
330			return False
331
332		if allow_scientific:
333			value, exp = _decompose_string_pair(value, self._scientific_char, self.int_case_sensitive)
334			if exp is not None:
335				return self.is_int(
336					value, allow_sign=True, allow_negative=allow_negative, allow_scientific=False
337				) and self.is_int(
338					exp, allow_sign=True, allow_negative=False, allow_scientific=False
339				)
340
341		if value[0] in self._sign_chars:
342			if len(value) == 1:
343				return False
344			if not allow_sign:
345				return False
346			if not allow_negative and value[0] in self._negative_chars:
347				return False
348			value = value[1:]
349		if value[0] in self._digit_separators or value[-1] in self._digit_separators:
350			return False
351
352		prev_separated = False
353		for c in value:
354			if c in self._digit_separators:
355				if prev_separated:
356					return False
357				prev_separated = True
358			else:
359				prev_separated = False
360				if c not in self._digit_chars:
361					return False
362		return True
363
364
365	def is_float(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> bool:
366		"""
367			Check if a string represents a float (or equivalently, a Decimal)
368
369			This function will also return True if the string represents an int.
370
371			Alias: `is_decimal()`
372
373			Parameters
374			----------
375			`value`
376			: string to be checked
377
378			`allow_scientific`
379			: whether to accept scientific notation. If True, strings of the form `"<var>M</var>e<var>X</var>"` will be interpreted as the expression `<var>M</var> * (10 ** <var>X</var>)`, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>X</var> must be an integer, but can be negative.
380
381			`allow_inf`
382			: whether to accept positive and negative infinity values. If True, strings that match the values in `self.inf_values` (empty by default) are interpreted as infinity, or as negative infinity if prepended by a negative sign. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
383
384			`allow_nan`
385			: whether to accept NaN (not a number) representations. If True, strings that match the values in `self.nan_values` (empty by default) are interpeted as NaN. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
386
387			Returns
388			-------
389			whether it is a float or Decimal
390
391			Examples
392			--------
393			```python
394			parser = TypeParser()
395			parser.is_float("1.")       # True
396			parser.is_float("12.3e-2")  # True
397			parser.is_float("abc")      # False
398			parser.is_float("")         # False
399			```
400		"""
401		if self.trim:
402			value = value.strip()
403
404		if len(value) > 0 and value[0] in self._sign_chars:
405			value = value[1:]
406
407		if self.float_case_sensitive:
408			special_value = value
409		else:
410			special_value = value.lower()
411		if allow_inf and special_value in self.inf_values:
412			return True
413		if allow_nan and special_value in self.nan_values:
414			return True
415
416		if len(value) == 0:
417			return False
418
419		if allow_scientific:
420			value, exp = _decompose_string_pair(value, self._scientific_char, self.float_case_sensitive)
421			if exp is not None:
422				return self.is_float(value, allow_scientific=False, allow_inf=False, allow_nan=False) and self.is_int(exp, allow_sign=True, allow_negative=True, allow_scientific=False)
423
424		value, frac = _decompose_string_pair(value, self._float_separator, self.float_case_sensitive)
425		if frac is not None:
426			if value == "" and frac == "":
427				return False
428			return (
429				self.is_int(value, allow_sign=True, allow_negative=False, allow_scientific=False) or value == ""
430			) and (
431				self.is_int(frac, allow_sign=False, allow_negative=False, allow_scientific=False) or frac == ""
432			)
433
434		return self.is_int(value, allow_sign=True, allow_negative=True, allow_scientific=False)
435
436
437	def is_decimal(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> bool:
438		"""
439			Alias of `is_float()`
440		"""
441		return self.is_float(value, allow_scientific=allow_scientific, allow_inf=allow_inf, allow_nan=allow_nan)
442
443
444	def parse_none(self, value: str) -> None:
445		"""
446			Parse a string and return it as the value None if possible
447
448			Only strings that match the values in `self.none_values` will be interpreted as None. The default accepted values are `[""]`, i.e. an empty string. The case sensitivity of this matching depends on `self.none_case_sensitive`, which is False by default.
449
450			Parameters
451			----------
452			`value`
453			: string to be parsed
454
455			Returns
456			-------
457			parsed None value
458
459			Raises
460			------
461			`ValueError` if `value` cannot be parsed
462
463			Examples
464			--------
465			```python
466			parser = TypeParser()
467			parser.parse_bool("")     # None
468			parser.parse_bool("abc")  # raises ValueError
469			```
470		"""
471		if self.is_none(value):
472			return None
473		else:
474			raise ValueError(f"not a none value: {value}")
475
476
477	def parse_bool(self, value: str) -> bool:
478		"""
479			Parse a string and return it as a bool if possible
480
481			Only strings that match the values in `self.true_values` and `self.false_values` will be interpreted as booleans. The default accepted values are `["true"]` and `["false"]` respectively. The case sensitivity of this matching depends on `self.bool_case_sensitive`, which is False by default.
482
483			Parameters
484			----------
485			`value`
486			: string to be parsed
487
488			Returns
489			-------
490			parsed bool value
491
492			Raises
493			------
494			`ValueError` if `value` cannot be parsed
495
496			Examples
497			--------
498			```python
499			parser = TypeParser()
500			parser.parse_bool("true")   # True
501			parser.parse_bool("FALSE")  # False
502			```
503		"""
504		if self.trim:
505			value = value.strip()
506
507		if self.bool_case_sensitive:
508			special_value = value
509		else:
510			special_value = value.lower()
511		if special_value in self.true_values:
512			return True
513		if special_value in self.false_values:
514			return False
515
516		raise ValueError(f"not a boolean: {value}")
517
518
519	def parse_int(self, value: str, *, allow_scientific: bool=True) -> int:
520		"""
521			Parse a string and return it as an int if possible
522
523			If the string represents a bool, it will be converted to `1` for True and `0` for False.
524
525			Parameters
526			----------
527			`value`
528			: string to be parsed
529
530			`allow_scientific`
531			: whether to accept scientific notation. If True, strings of the form `"<var>M</var>e<var>X</var>"` will be interpreted as the expression `<var>M</var> * (10 ** <var>X</var>)`, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note <var>M</var> *must* be an integer and <var>X</var> *must* be a non-negative integer, even in cases where the above expression evaluates mathematically to an integer.
532
533			Returns
534			-------
535			parsed int value
536
537			Raises
538			------
539			`ValueError` if `value` cannot be parsed
540
541			Examples
542			--------
543			```python
544			parser = TypeParser()
545			parser.parse_int("0")    # 0
546			parser.parse_int("-1")   # -1
547			parser.parse_int("2e3")  # 2000
548			```
549		"""
550		if self.trim:
551			value = value.strip()
552
553		if self.is_int(value, allow_sign=True, allow_negative=True, allow_scientific=allow_scientific):
554			if allow_scientific:
555				value, exp = _decompose_string_pair(value, self._scientific_char, self.int_case_sensitive)
556				if exp is not None:
557					if value[0] in (self._negative_chars - {self._negative_char}):
558						value = self._negative_char + value[1:]
559					return int(value) * (10 ** int(exp))
560
561			if value[0] in (self._negative_chars - {self._negative_char}):
562				value = self._negative_char + value[1:]
563			return int(value)
564
565		elif self.is_bool(value):
566			return int(self.parse_bool(value))
567		else:
568			raise ValueError(f"not an integer: {value}")
569
570
571	def _parse_floatlike(self,
572		value: str,
573		converter: Callable[[Union[str, bool]], _FloatLike],
574		inf_value: _FloatLike,
575		nan_value: _FloatLike,
576		*,
577		allow_scientific: bool=True,
578		allow_inf: bool=True,
579		allow_nan: bool=True
580	) -> _FloatLike:
581		if self.trim:
582			value = value.strip()
583		if self.is_float(value, allow_scientific=allow_scientific, allow_inf=allow_inf, allow_nan=allow_nan):
584			if self.float_case_sensitive:
585				special_value = value
586			else:
587				special_value = value.lower()
588			if allow_inf and special_value in self.inf_values:
589				return inf_value
590			if allow_nan and special_value in self.nan_values:
591				return nan_value
592
593			if len(value) > 0 and value[0] in self._sign_chars:
594				positive_part = value[1:]
595				if self.float_case_sensitive:
596					special_value = positive_part
597				else:
598					special_value = positive_part.lower()
599				if allow_inf and special_value in self.inf_values:
600					if value[0] in self._negative_chars:
601						return -1 * inf_value
602					else:
603						return inf_value
604				if allow_nan and special_value in self.nan_values:
605					return nan_value
606
607				if value[0] in self._negative_chars:
608					value = self._negative_char + positive_part
609			return converter(value)
610		elif self.is_bool(value):
611			return converter(self.parse_bool(value))
612		else:
613			raise ValueError(f"not a {_FloatLike.__name__}: {value}")
614
615
616	def parse_float(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> float:
617		"""
618			Parse a string and return it as a (non-exact) float if possible
619
620			If the string represents a bool, it will be converted to `1.` for True and `0.` for False. If the string represents an int, it will be converted to a float also.
621
622			Behaves analogously to `parse_decimal()`, except that that returns an exact Decimal instead.
623
624			Parameters
625			----------
626			`value`
627			: string to be parsed
628
629			`allow_scientific`
630			: whether to accept scientific notation. If True, strings of the form `"<var>M</var>e<var>X</var>"` will be interpreted as the expression `<var>M</var> * (10 ** <var>X</var>)`, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>X</var> must be an integer, but can be negative.
631
632			`allow_inf`
633			: whether to accept positive and negative infinity values. If True, strings that match the values in `self.inf_values` (empty by default) are interpreted as infinity, or as negative infinity if prepended by a negative sign. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
634
635			`allow_nan`
636			: whether to accept NaN (not a number) representations. If True, strings that match the values in `self.nan_values` (empty by default) are interpeted as NaN. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
637
638			Returns
639			-------
640			parsed float value
641
642			Raises
643			------
644			`ValueError` if `value` cannot be parsed
645
646			Examples
647			--------
648			```python
649			parser = TypeParser(inf_values=["inf"], nan_values=["nan"])
650			parser.parse_float("1.")       # 1.
651			parser.parse_float("1.23e2")   # 123.
652			parser.parse_float("1.23e-2")  # 0.0123
653			parser.parse_float("inf")      # math.inf
654			```
655		"""
656		return self._parse_floatlike(value, float, math.inf, math.nan,
657			allow_scientific=allow_scientific,
658			allow_inf=allow_inf,
659			allow_nan=allow_nan,
660		)
661
662
663	def parse_decimal(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> Decimal:
664		"""
665			Parse a string and return it as an exact Decimal if possible
666
667			If the string represents a bool, it will be converted to `Decimal(1)` for True and `Decimal(0)` for False. If the string represents an int, it will be converted to a Decimal also.
668
669			Behaves analogously to `parse_float()`, except that that returns a non-exact float instead.
670
671			Parameters
672			----------
673			`value`
674			: string to be parsed
675
676			`allow_scientific`
677			: whether to accept scientific notation. If True, strings of the form `"<var>M</var>e<var>X</var>"` will be interpreted as the expression `<var>M</var> * (10 ** <var>X</var>)`, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>X</var> must be an integer, but can be negative.
678
679			`allow_inf`
680			: whether to accept positive and negative infinity values. If True, strings that match the values in `self.inf_values` (empty by default) are interpreted as infinity, or as negative infinity if prepended by a negative sign. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
681
682			`allow_nan`
683			: whether to accept NaN (not a number) representations. If True, strings that match the values in `self.nan_values` (empty by default) are interpeted as NaN. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
684
685			Returns
686			-------
687			parsed Decimal value
688
689			Raises
690			------
691			`ValueError` if `value` cannot be parsed
692
693			Examples
694			--------
695			```python
696			parser = TypeParser(inf_values=["inf"], nan_values=["nan"])
697			parser.parse_decimal("1.")       # Decimal(1)
698			parser.parse_decimal("1.23e2")   # Decimal(123)
699			parser.parse_decimal("1.23e-2")  # Decimal(123) / Decimal(10000)
700			parser.parse_decimal("inf")      # Decimal(math.inf)
701			```
702		"""
703		return self._parse_floatlike(value, Decimal, Decimal(math.inf), Decimal(math.nan),
704			allow_scientific=allow_scientific,
705			allow_inf=allow_inf,
706			allow_nan=allow_nan,
707		)
708
709
710	def infer(self, value: str) -> AnyValueType:
711		"""
712			Infer the underlying type of a string
713
714			Also check for inline lists if `self.list_delimiter` is not None.
715
716			Parameters
717			----------
718			`value`
719			: the string for which the type should be inferred
720
721			Returns
722			-------
723			inferred type
724
725			Examples
726			--------
727			```python
728			parser = TypeParser()
729			parser.infer("true")  # bool
730			parser.infer("2.0")   # float
731			parser.infer("abc")   # str
732			```
733		"""
734		if self.is_none(value):
735			return NoneType
736		if self.is_bool(value):
737			return bool
738		if self.is_int(value):
739			return int
740		if self.is_float(value):
741			if self.use_decimal:
742				return Decimal
743			else:
744				return float
745
746		if self.trim:
747			value = value.strip()
748
749		if self.list_delimiter is not None and self.list_delimiter in value:
750			subvalues = value.split(self.list_delimiter)
751			if self.trim:
752				subvalues = [subvalue.strip() for subvalue in subvalues]
753			reduced_type = reduce_types(self.infer(subvalue) for subvalue in subvalues)
754			reduced_type = cast(AnyContainedType, reduced_type)
755			r = list[reduced_type]
756			return r  # type: ignore
757
758		return GenericValue
759
760
761	def infer_series(self, values: Iterable[str]) -> AnyValueType:
762		"""
763			Infer the underlying common type of a series of strings
764
765			If the values in the series do not have the same apparent type, the resulting type will be narrowest possible type that will encompass all values in the series. See `parsetypes.reduce_types()` for more information.
766
767			Parameters
768			----------
769			`values`
770			: series of strings for which the type should be inferred
771
772			Returns
773			-------
774			inferred type
775
776			Examples
777			--------
778			```python
779			parser = TypeParser()
780			parser.infer_series(["1", "2", "3.4"])       # float
781			parser.infer_series(["true", "false", "2"])  # int
782			parser.infer_series(["1", "2.3", "abc"])     # str
783			```
784		"""
785		return reduce_types(self.infer(value) for value in values)
786
787
788	def infer_table(self, rows: Iterable[Sequence[str]]) -> list[AnyValueType]:
789		"""
790			Infer the underlying common type for each column of a table of strings
791
792			For each column, if the values do not have the same apparent type, the resulting type will be narrowest possible type that will encompass all values in the column. See `parsetypes.reduce_types()` for more information.
793
794			Note that the inferred types of every individual value must all be able to fit into memory at once.
795
796			Parameters
797			----------
798			`rows`
799			: table of strings for which the types should be inferred, in row-major order
800
801			Returns
802			-------
803			inferred types
804
805			Examples
806			--------
807			```python
808			parser = TypeParser()
809			parser.infer_table([
810				["1",   "true",  "1"],
811				["2",   "false", "2.3"],
812				["3.4", "2",     "abc"],
813			])
814			# [float, int, str]
815			```
816		"""
817		rows_iter = iter(rows)
818		first_row = next(rows_iter, None)
819		if first_row is None:
820			return []
821
822		num_cols = len(first_row)
823		if num_cols == 0:
824			return []
825
826		table = _TypeTable([[self.infer(value)] for value in first_row])
827		for row in rows_iter:
828			table.add_row([self.infer(value) for value in row])
829
830		return [reduce_types(col) for col in table.cols]
831
832
833	def _convert(self, value: str, t: AnyValueType) -> AnyValue:
834		base, type_args = _decompose_type(t)
835		if base == NoneType:
836			return None
837		elif base == bool:
838			return self.parse_bool(value)
839		elif base == int:
840			return self.parse_int(value)
841		elif base == Decimal:
842			return self.parse_decimal(value)
843		elif base == float:
844			return self.parse_float(value)
845		elif base == str:
846			return value
847		elif base == Nullable:
848			if self.is_none(value):
849				return None
850			else:
851				if type_args is not  None and len(type_args) == 1 and type_args[0] != str:
852					inner_type = type_args[0]
853					return self._convert(value, inner_type)
854				else:
855					return value
856		elif base == list:
857			subvalues = value.split(self.list_delimiter)
858			if self.trim:
859				subvalues = [subvalue.strip() for subvalue in subvalues]
860			if type_args is not None and len(type_args) == 1 and type_args[0] != str:
861				subtype = type_args[0]
862				return cast(AnyContained, [self._convert(subvalue, subtype) for subvalue in subvalues])
863			else:
864				return subvalues
865		else:
866			return value
867
868
869	def parse(self, value: str) -> AnyValue:
870		"""
871			Parse a string and convert it to its underlying type
872
873			Parameters
874			----------
875			`value`
876			: the string to be parsed
877
878			Returns
879			-------
880			converted value
881
882			Examples
883			--------
884			```python
885			parser = TypeParser()
886			parser.parse("true")  # True
887			parser.parse("2.0")   # 2.
888			parser.parse("abc")   # "abc"
889			```
890		"""
891		return self._convert(value, self.infer(value))
892
893
894	def parse_series(self, values: Iterable[str]) -> list[AnyValue]:
895		"""
896			Parse a series of strings and convert them to their underlying common type
897
898			If the values in the series do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the series. See `parsetypes.reduce_types()` for more information.
899
900			Parameters
901			----------
902			`values`
903			: series of strings to be parsed
904
905			Returns
906			-------
907			converted values
908
909			Examples
910			--------
911			```python
912			parser = TypeParser()
913			parser.parse_series(["1", "2", "3"])        # [1, 2, 3]
914			parser.parse_series(["5", "6.7", "8."])     # [5., 6.7, 8.]
915			parser.parse_series(["true", "false", ""])  # [True, False, None]
916			parser.parse_series(["1", "2.3", "abc"])    # ["1", "2.3", "abc"]
917			```
918		"""
919		inferred = self.infer_series(values)
920		return [self._convert(value, inferred) for value in values]
921
922
923	def parse_table(self, rows: Iterable[Sequence[str]]) -> list[list[AnyValue]]:
924		"""
925			Parse a table of strings and convert them to the underlying common type of each column
926
927			For each column, if the values do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the column. See `parsetypes.reduce_types()` for more information.
928
929			Note that the type inference requires that the inferred types of every individual value must all be able to fit into memory at once.
930
931			This is a function that computes the entire table and returns it all at once. The generator `iterate_table()` behaves analogously, except that it computes and yields each row one at a time.
932
933			Parameters
934			----------
935			`rows`
936			: table of strings to be parsed, in row-major order
937
938			`iterator`
939			: whether the parsed values should be yielded as an iterator. If False, which is the default, the entire table is computed and returned as a list of lists. If True, this function behaves as a generator, and the rows of the table are computed and yielded one at a time. However, note that even when set to True, the type inference requires that inferred type of each individual value must all be able to fit into memory at once.
940
941			Returns
942			-------
943			converted table of values, in row-major order
944
945			Examples
946			--------
947			```python
948			parser = TypeParser()
949			table = parser.parse_table([
950				["1", "5",   "true",  "1"],
951				["2", "6.7", "false", "2.3"],
952				["3", "8.0", "",      "abc"],
953			]):
954			assert table == [
955				[1, 5.,  True,  "1"],
956				[2, 6.7, False, "2.3"],
957				[3, 8.,  None,  "abc"],
958			]
959			```
960		"""
961		return [converted_row for converted_row in self.iterate_table(rows)]
962
963
964	def iterate_table(self, rows: Iterable[Sequence[str]]) -> Iterator[list[AnyValue]]:
965		"""
966			Parse a table of strings for the underlying common type of each column, then convert and yield each row
967
968			For each column, if the values do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the column. See `parsetypes.reduce_types()` for more information.
969
970			This is a generator that computes and yields each row one at a time. The function `parse_table()` behaves analogously, except that it computes the entire table and returns it as a list of lists. However, note that although this is a generator, the type inference still requires that the inferred types of every individual value must all be able to fit into memory at once.
971
972			Parameters
973			----------
974			`rows`
975			: table of strings to be parsed, in row-major order
976
977			Yields
978			-------
979			each row of converted table values
980
981			Examples
982			--------
983			```python
984			parser = TypeParser()
985			table = parser.iterate_table([
986				["1",   "true",  "1"],
987				["2",   "false", "2.3"],
988				["3.4", "2",     "abc"],
989			]):
990			assert next(table) == [1.,  1, "1"]
991			assert next(table) == [2.,  0, "2.3"]
992			assert next(table) == [3.4, 2, "abc"]
993			```
994		"""
995		inferred_types = self.infer_table(rows)
996
997		for row in rows:
998			yield [self._convert(value, inferred) for value, inferred in zip(row, inferred_types)]

A parser that can be used to infer the underlying types of data serialised as strings, and to convert them into their original underlying types.

Instances of this class can be configured with different settings for the parser and inferrer. See the constructor for more details about the available options.

TypeParser( *, trim: bool = True, use_decimal: bool = False, list_delimiter: Optional[str] = None, none_values: Iterable[str] = [''], none_case_sensitive: bool = False, true_values: Iterable[str] = ['true'], false_values: Iterable[str] = ['false'], bool_case_sensitive: bool = False, int_case_sensitive: bool = False, inf_values: Iterable[str] = [], nan_values: Iterable[str] = [], float_case_sensitive: bool = False, case_sensitive: Optional[bool] = None)
 67	def __init__(self,
 68	    *,
 69		trim: bool=True,
 70		use_decimal: bool=False,
 71		list_delimiter: Optional[str]=None,
 72		none_values: Iterable[str]=[""],
 73		none_case_sensitive: bool=False,
 74		true_values: Iterable[str]=["true"],
 75		false_values: Iterable[str]=["false"],
 76		bool_case_sensitive: bool=False,
 77		int_case_sensitive: bool=False,
 78		inf_values: Iterable[str]=[],
 79		nan_values: Iterable[str]=[],
 80		float_case_sensitive: bool=False,
 81		case_sensitive: Optional[bool]=None,
 82	):
 83		"""
 84			Initialise a new parser
 85
 86			Parameters
 87			----------
 88			`trim`
 89			: whether leading and trailing whitespace should be stripped from strings
 90
 91			`use_decimal`
 92			: whether non-integer numeric values should be inferred as Decimal (exact values) instead of float (non-exact values). Note that this only applies to methods that attempt to infer type (`infer()` and `infer_*()`), and does not affect methods where the type is explicitly specified (`is_float()`, `is_decimal()`, `parse_float()`, `parse_decimal()`).
 93
 94			`list_delimiter`
 95			: the delimiter used for identifying lists and for separating list items. If set to None, the parser will not attempt to identify lists when inferring types, which usually causes the value to be treated as a str instead.
 96
 97			`none_values`
 98			: list of strings that represent the value None
 99
100			`none_case_sensitive`
101			: whether matches against `none_values` should be made in a case-sensitive manner
102
103			`true_values`
104			: list of strings that represent the bool value True
105
106			`false_values`
107			: list of strings that represent the bool value False
108
109			`bool_case_sensitive`
110			: whether matches against `true_values` and `false_values` should be made in a case-sensitive manner
111
112			`int_case_sensitive`
113			: whether checks for int should be done in a case-sensitive manner. This usually only applies to values given in scientific notation, where the mantissa and exponent usually are separated by `e`.
114
115			`inf_values`
116			: list of strings that represent the float or Decimal value of infinity. Each of the strings can be prepended with a negative sign to represent negative infinity also.
117
118			`nan_values`
119			: list of strings that represent a float or Decimal that is NaN (not a number)
120
121			`float_case_sensitive`
122			: whether checks for float should be done in a case-sensitive manner. This applies to matches against `inf_values` and `nan_values`, as well as to values given in scientific notation, where the mantissa and exponent are usually separated by `e`.
123
124			`case_sensitive`
125			: whether all matches should be made in a case-sensitive manner. Sets all of `none_case_sensitive`, `bool_case_sensitive`, `int_case_sensitive`, `float_case_sensitive` to the same value, ignoring any individual settings.
126
127			Raises
128			------
129			`ValueError` if any of the options would lead to ambiguities during parsing
130		"""
131
132		if case_sensitive is not None:
133			none_case_sensitive = case_sensitive
134			int_case_sensitive = case_sensitive
135			bool_case_sensitive = case_sensitive
136			float_case_sensitive = case_sensitive
137
138		self.trim = trim
139		if self.trim:
140			none_values = (value.strip() for value in none_values)
141			true_values = (value.strip() for value in true_values)
142			false_values = (value.strip() for value in false_values)
143			inf_values = (value.strip() for value in inf_values)
144			nan_values = (value.strip() for value in nan_values)
145
146		self.use_decimal = use_decimal
147		self.list_delimiter = list_delimiter
148
149		self.none_case_sensitive = none_case_sensitive
150		if not self.none_case_sensitive:
151			none_values = (value.lower() for value in none_values)
152		self.none_values = set(none_values)
153
154		self.bool_case_sensitive = bool_case_sensitive
155		if not self.bool_case_sensitive:
156			true_values = (value.lower() for value in true_values)
157			false_values = (value.lower() for value in false_values)
158		self.true_values = set(true_values)
159		self.false_values = set(false_values)
160
161		self.int_case_sensitive = int_case_sensitive
162
163		self.float_case_sensitive = float_case_sensitive
164		if not self.float_case_sensitive:
165			inf_values = (value.lower() for value in inf_values)
166			nan_values = (value.lower() for value in nan_values)
167		self.inf_values = set(inf_values)
168		self.nan_values = set(nan_values)
169
170		# Unconfigurable default values
171		self._negative_char = "-"
172		self._negative_chars = {self._negative_char, "−"}
173		self._sign_chars = self._negative_chars | {"+"}
174		self._digit_chars = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"}  # Because isdigit("²") == True, but int("²") is invalid
175		self._digit_separators = {"_"}
176		self._scientific_char = "e"
177		self._float_separator = "."
178		self._reserved_chars = self._sign_chars | self._digit_chars | self._digit_separators | {self._scientific_char} | {self._float_separator}
179		# special_chars = self._reserved_chars | self.list_delimiter
180
181		# Check if any special values conflict
182		for name, special_values in [
183			(_SpecialValue.LIST, [self.list_delimiter] if self.list_delimiter is not None else []),
184			(_SpecialValue.NONE, self.none_values),
185			(_SpecialValue.TRUE, self.true_values),
186			(_SpecialValue.FALSE, self.false_values),
187			(_SpecialValue.INF, self.inf_values),
188			(_SpecialValue.NAN, self.nan_values),
189		]:
190			for special_value in special_values:
191				if special_value in self._reserved_chars:
192					raise ValueError(f"cannot use reserved char as {name.value}: {special_value}")
193
194				if name != _SpecialValue.NONE and self.is_none(special_value):
195					raise ValueError(f"cannot use None value as {name.value}: {special_value}")
196
197				if (
198					(name == _SpecialValue.TRUE and self.parse_bool(special_value) != True) or
199					(name == _SpecialValue.FALSE and self.parse_bool(special_value) != False) or
200					(name != _SpecialValue.TRUE and name != _SpecialValue.FALSE and self.is_bool(special_value))
201				):
202					raise ValueError(f"cannot use bool value as {name.value}: {special_value}")
203
204				if self.is_int(special_value):
205					raise ValueError(f"cannot use int value as {name.value}: {special_value}")
206
207				if self.use_decimal:
208					if (
209						(name == _SpecialValue.INF and self.parse_decimal(special_value) != Decimal(math.inf)) or
210						(name == _SpecialValue.NAN and not self.parse_decimal(special_value).is_nan()) or
211						(name != _SpecialValue.INF and name != _SpecialValue.NAN and self.is_float(special_value))
212					):
213						raise ValueError(f"cannot use Decimal value as {name}: {special_value}")
214				else:
215					if (
216						(name == _SpecialValue.INF and self.parse_float(special_value) != math.inf) or
217						(name == _SpecialValue.NAN and self.parse_float(special_value) is not math.nan) or
218						(name != _SpecialValue.INF and name != _SpecialValue.NAN and self.is_float(special_value))
219					):
220						raise ValueError(f"cannot use float value as {name}: {special_value}")

Initialise a new parser

Parameters

trim : whether leading and trailing whitespace should be stripped from strings

use_decimal : whether non-integer numeric values should be inferred as Decimal (exact values) instead of float (non-exact values). Note that this only applies to methods that attempt to infer type (infer() and infer_*()), and does not affect methods where the type is explicitly specified (is_float(), is_decimal(), parse_float(), parse_decimal()).

list_delimiter : the delimiter used for identifying lists and for separating list items. If set to None, the parser will not attempt to identify lists when inferring types, which usually causes the value to be treated as a str instead.

none_values : list of strings that represent the value None

none_case_sensitive : whether matches against none_values should be made in a case-sensitive manner

true_values : list of strings that represent the bool value True

false_values : list of strings that represent the bool value False

bool_case_sensitive : whether matches against true_values and false_values should be made in a case-sensitive manner

int_case_sensitive : whether checks for int should be done in a case-sensitive manner. This usually only applies to values given in scientific notation, where the mantissa and exponent usually are separated by e.

inf_values : list of strings that represent the float or Decimal value of infinity. Each of the strings can be prepended with a negative sign to represent negative infinity also.

nan_values : list of strings that represent a float or Decimal that is NaN (not a number)

float_case_sensitive : whether checks for float should be done in a case-sensitive manner. This applies to matches against inf_values and nan_values, as well as to values given in scientific notation, where the mantissa and exponent are usually separated by e.

case_sensitive : whether all matches should be made in a case-sensitive manner. Sets all of none_case_sensitive, bool_case_sensitive, int_case_sensitive, float_case_sensitive to the same value, ignoring any individual settings.

Raises

ValueError if any of the options would lead to ambiguities during parsing

def is_none(self, value: str) -> bool:
223	def is_none(self, value: str) -> bool:
224		"""
225			Check if a string represents the value None
226
227			Only strings that match the values in `self.none_values` will be interpreted as None. The default accepted values are `[""]`, i.e. an empty string. The case sensitivity of this matching depends on `self.none_case_sensitive`, which is False by default.
228
229			Parameters
230			----------
231			`value`
232			: string to be checked
233
234			Returns
235			-------
236			whether it is None
237
238			Examples
239			--------
240			```python
241			parser = TypeParser()
242			parser.parse_bool("")     # True
243			parser.parse_bool("abc")  # False
244			```
245		"""
246		if self.trim:
247			value = value.strip()
248		if not self.bool_case_sensitive:
249			value = value.lower()
250
251		if value in self.none_values:
252			return True
253		else:
254			return False

Check if a string represents the value None

Only strings that match the values in self.none_values will be interpreted as None. The default accepted values are [""], i.e. an empty string. The case sensitivity of this matching depends on self.none_case_sensitive, which is False by default.

Parameters

value : string to be checked

Returns

whether it is None

Examples

parser = TypeParser()
parser.parse_bool("")     # True
parser.parse_bool("abc")  # False
def is_bool(self, value: str) -> bool:
257	def is_bool(self, value: str) -> bool:
258		"""
259			Check if a string represents a bool
260
261			Only strings that match the values in `self.true_values` and `self.false_values` will be interpreted as booleans. The default accepted values are `["true"]` and `["false"]` respectively. The case sensitivity of this matching depends on `self.bool_case_sensitive`, which is False by default.
262
263			Parameters
264			----------
265			`value`
266			: string to be checked
267
268			Returns
269			-------
270			whether it is a bool
271
272			Examples
273			--------
274			```python
275			parser = TypeParser()
276			parser.is_bool("true")  # True
277			parser.is_bool("")      # True
278			parser.is_bool("abc")   # False
279			```
280		"""
281		if self.trim:
282			value = value.strip()
283
284		if not self.bool_case_sensitive:
285			value = value.lower()
286		if value in self.true_values:
287			return True
288		if value in self.false_values:
289			return True
290
291		return False

Check if a string represents a bool

Only strings that match the values in self.true_values and self.false_values will be interpreted as booleans. The default accepted values are ["true"] and ["false"] respectively. The case sensitivity of this matching depends on self.bool_case_sensitive, which is False by default.

Parameters

value : string to be checked

Returns

whether it is a bool

Examples

parser = TypeParser()
parser.is_bool("true")  # True
parser.is_bool("")      # True
parser.is_bool("abc")   # False
def is_int( self, value: str, *, allow_sign: bool = True, allow_negative: bool = True, allow_scientific: bool = True) -> bool:
294	def is_int(self, value: str, *, allow_sign: bool=True, allow_negative: bool=True, allow_scientific: bool=True) -> bool:
295		"""
296			Check if a string represents an int
297
298			Parameters
299			----------
300			`value`
301			: string to be checked
302
303			`allow_negative`
304			: whether to accept negative values
305
306			`allow_sign`
307			: whether to accept signed values. If False, it implies that `allow_negative` is False also.
308
309			`allow_scientific`
310			: whether to accept scientific notation. If True, strings of the form `"<var>M</var>e<var>X</var>"` will be interpreted as the expression `<var>M</var> * (10 ** <var>X</var>)`, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note <var>M</var> *must* be an integer and <var>X</var> *must* be a non-negative integer, even in cases where the above expression evaluates mathematically to an integer.
311
312			Returns
313			-------
314			whether it is an int
315
316			Examples
317			--------
318			```python
319			parser = TypeParser()
320			parser.is_int("0")    # True
321			parser.is_int("-1")   # True
322			parser.is_int("abc")  # False
323			parser.is_int("")     # False
324			```
325		"""
326		if self.trim:
327			value = value.strip()
328
329		if len(value) == 0:
330			return False
331
332		if allow_scientific:
333			value, exp = _decompose_string_pair(value, self._scientific_char, self.int_case_sensitive)
334			if exp is not None:
335				return self.is_int(
336					value, allow_sign=True, allow_negative=allow_negative, allow_scientific=False
337				) and self.is_int(
338					exp, allow_sign=True, allow_negative=False, allow_scientific=False
339				)
340
341		if value[0] in self._sign_chars:
342			if len(value) == 1:
343				return False
344			if not allow_sign:
345				return False
346			if not allow_negative and value[0] in self._negative_chars:
347				return False
348			value = value[1:]
349		if value[0] in self._digit_separators or value[-1] in self._digit_separators:
350			return False
351
352		prev_separated = False
353		for c in value:
354			if c in self._digit_separators:
355				if prev_separated:
356					return False
357				prev_separated = True
358			else:
359				prev_separated = False
360				if c not in self._digit_chars:
361					return False
362		return True

Check if a string represents an int

Parameters

value : string to be checked

allow_negative : whether to accept negative values

allow_sign : whether to accept signed values. If False, it implies that allow_negative is False also.

allow_scientific : whether to accept scientific notation. If True, strings of the form "<var>M</var>e<var>X</var>" will be interpreted as the expression <var>M</var> * (10 ** <var>X</var>), where M is the mantissa/significand and X is the exponent. Note M must be an integer and X must be a non-negative integer, even in cases where the above expression evaluates mathematically to an integer.

Returns

whether it is an int

Examples

parser = TypeParser()
parser.is_int("0")    # True
parser.is_int("-1")   # True
parser.is_int("abc")  # False
parser.is_int("")     # False
def is_float( self, value: str, *, allow_scientific: bool = True, allow_inf: bool = True, allow_nan: bool = True) -> bool:
365	def is_float(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> bool:
366		"""
367			Check if a string represents a float (or equivalently, a Decimal)
368
369			This function will also return True if the string represents an int.
370
371			Alias: `is_decimal()`
372
373			Parameters
374			----------
375			`value`
376			: string to be checked
377
378			`allow_scientific`
379			: whether to accept scientific notation. If True, strings of the form `"<var>M</var>e<var>X</var>"` will be interpreted as the expression `<var>M</var> * (10 ** <var>X</var>)`, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>X</var> must be an integer, but can be negative.
380
381			`allow_inf`
382			: whether to accept positive and negative infinity values. If True, strings that match the values in `self.inf_values` (empty by default) are interpreted as infinity, or as negative infinity if prepended by a negative sign. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
383
384			`allow_nan`
385			: whether to accept NaN (not a number) representations. If True, strings that match the values in `self.nan_values` (empty by default) are interpeted as NaN. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
386
387			Returns
388			-------
389			whether it is a float or Decimal
390
391			Examples
392			--------
393			```python
394			parser = TypeParser()
395			parser.is_float("1.")       # True
396			parser.is_float("12.3e-2")  # True
397			parser.is_float("abc")      # False
398			parser.is_float("")         # False
399			```
400		"""
401		if self.trim:
402			value = value.strip()
403
404		if len(value) > 0 and value[0] in self._sign_chars:
405			value = value[1:]
406
407		if self.float_case_sensitive:
408			special_value = value
409		else:
410			special_value = value.lower()
411		if allow_inf and special_value in self.inf_values:
412			return True
413		if allow_nan and special_value in self.nan_values:
414			return True
415
416		if len(value) == 0:
417			return False
418
419		if allow_scientific:
420			value, exp = _decompose_string_pair(value, self._scientific_char, self.float_case_sensitive)
421			if exp is not None:
422				return self.is_float(value, allow_scientific=False, allow_inf=False, allow_nan=False) and self.is_int(exp, allow_sign=True, allow_negative=True, allow_scientific=False)
423
424		value, frac = _decompose_string_pair(value, self._float_separator, self.float_case_sensitive)
425		if frac is not None:
426			if value == "" and frac == "":
427				return False
428			return (
429				self.is_int(value, allow_sign=True, allow_negative=False, allow_scientific=False) or value == ""
430			) and (
431				self.is_int(frac, allow_sign=False, allow_negative=False, allow_scientific=False) or frac == ""
432			)
433
434		return self.is_int(value, allow_sign=True, allow_negative=True, allow_scientific=False)

Check if a string represents a float (or equivalently, a Decimal)

This function will also return True if the string represents an int.

Alias: is_decimal()

Parameters

value : string to be checked

allow_scientific : whether to accept scientific notation. If True, strings of the form "<var>M</var>e<var>X</var>" will be interpreted as the expression <var>M</var> * (10 ** <var>X</var>), where M is the mantissa/significand and X is the exponent. Note that X must be an integer, but can be negative.

allow_inf : whether to accept positive and negative infinity values. If True, strings that match the values in self.inf_values (empty by default) are interpreted as infinity, or as negative infinity if prepended by a negative sign. The case sensitivity of this matching depends on self.float_case_sensitive, which is False by default.

allow_nan : whether to accept NaN (not a number) representations. If True, strings that match the values in self.nan_values (empty by default) are interpeted as NaN. The case sensitivity of this matching depends on self.float_case_sensitive, which is False by default.

Returns

whether it is a float or Decimal

Examples

parser = TypeParser()
parser.is_float("1.")       # True
parser.is_float("12.3e-2")  # True
parser.is_float("abc")      # False
parser.is_float("")         # False
def is_decimal( self, value: str, *, allow_scientific: bool = True, allow_inf: bool = True, allow_nan: bool = True) -> bool:
437	def is_decimal(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> bool:
438		"""
439			Alias of `is_float()`
440		"""
441		return self.is_float(value, allow_scientific=allow_scientific, allow_inf=allow_inf, allow_nan=allow_nan)

Alias of is_float()

def parse_none(self, value: str) -> None:
444	def parse_none(self, value: str) -> None:
445		"""
446			Parse a string and return it as the value None if possible
447
448			Only strings that match the values in `self.none_values` will be interpreted as None. The default accepted values are `[""]`, i.e. an empty string. The case sensitivity of this matching depends on `self.none_case_sensitive`, which is False by default.
449
450			Parameters
451			----------
452			`value`
453			: string to be parsed
454
455			Returns
456			-------
457			parsed None value
458
459			Raises
460			------
461			`ValueError` if `value` cannot be parsed
462
463			Examples
464			--------
465			```python
466			parser = TypeParser()
467			parser.parse_bool("")     # None
468			parser.parse_bool("abc")  # raises ValueError
469			```
470		"""
471		if self.is_none(value):
472			return None
473		else:
474			raise ValueError(f"not a none value: {value}")

Parse a string and return it as the value None if possible

Only strings that match the values in self.none_values will be interpreted as None. The default accepted values are [""], i.e. an empty string. The case sensitivity of this matching depends on self.none_case_sensitive, which is False by default.

Parameters

value : string to be parsed

Returns

parsed None value

Raises

ValueError if value cannot be parsed

Examples

parser = TypeParser()
parser.parse_bool("")     # None
parser.parse_bool("abc")  # raises ValueError
def parse_bool(self, value: str) -> bool:
477	def parse_bool(self, value: str) -> bool:
478		"""
479			Parse a string and return it as a bool if possible
480
481			Only strings that match the values in `self.true_values` and `self.false_values` will be interpreted as booleans. The default accepted values are `["true"]` and `["false"]` respectively. The case sensitivity of this matching depends on `self.bool_case_sensitive`, which is False by default.
482
483			Parameters
484			----------
485			`value`
486			: string to be parsed
487
488			Returns
489			-------
490			parsed bool value
491
492			Raises
493			------
494			`ValueError` if `value` cannot be parsed
495
496			Examples
497			--------
498			```python
499			parser = TypeParser()
500			parser.parse_bool("true")   # True
501			parser.parse_bool("FALSE")  # False
502			```
503		"""
504		if self.trim:
505			value = value.strip()
506
507		if self.bool_case_sensitive:
508			special_value = value
509		else:
510			special_value = value.lower()
511		if special_value in self.true_values:
512			return True
513		if special_value in self.false_values:
514			return False
515
516		raise ValueError(f"not a boolean: {value}")

Parse a string and return it as a bool if possible

Only strings that match the values in self.true_values and self.false_values will be interpreted as booleans. The default accepted values are ["true"] and ["false"] respectively. The case sensitivity of this matching depends on self.bool_case_sensitive, which is False by default.

Parameters

value : string to be parsed

Returns

parsed bool value

Raises

ValueError if value cannot be parsed

Examples

parser = TypeParser()
parser.parse_bool("true")   # True
parser.parse_bool("FALSE")  # False
def parse_int(self, value: str, *, allow_scientific: bool = True) -> int:
519	def parse_int(self, value: str, *, allow_scientific: bool=True) -> int:
520		"""
521			Parse a string and return it as an int if possible
522
523			If the string represents a bool, it will be converted to `1` for True and `0` for False.
524
525			Parameters
526			----------
527			`value`
528			: string to be parsed
529
530			`allow_scientific`
531			: whether to accept scientific notation. If True, strings of the form `"<var>M</var>e<var>X</var>"` will be interpreted as the expression `<var>M</var> * (10 ** <var>X</var>)`, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note <var>M</var> *must* be an integer and <var>X</var> *must* be a non-negative integer, even in cases where the above expression evaluates mathematically to an integer.
532
533			Returns
534			-------
535			parsed int value
536
537			Raises
538			------
539			`ValueError` if `value` cannot be parsed
540
541			Examples
542			--------
543			```python
544			parser = TypeParser()
545			parser.parse_int("0")    # 0
546			parser.parse_int("-1")   # -1
547			parser.parse_int("2e3")  # 2000
548			```
549		"""
550		if self.trim:
551			value = value.strip()
552
553		if self.is_int(value, allow_sign=True, allow_negative=True, allow_scientific=allow_scientific):
554			if allow_scientific:
555				value, exp = _decompose_string_pair(value, self._scientific_char, self.int_case_sensitive)
556				if exp is not None:
557					if value[0] in (self._negative_chars - {self._negative_char}):
558						value = self._negative_char + value[1:]
559					return int(value) * (10 ** int(exp))
560
561			if value[0] in (self._negative_chars - {self._negative_char}):
562				value = self._negative_char + value[1:]
563			return int(value)
564
565		elif self.is_bool(value):
566			return int(self.parse_bool(value))
567		else:
568			raise ValueError(f"not an integer: {value}")

Parse a string and return it as an int if possible

If the string represents a bool, it will be converted to 1 for True and 0 for False.

Parameters

value : string to be parsed

allow_scientific : whether to accept scientific notation. If True, strings of the form "<var>M</var>e<var>X</var>" will be interpreted as the expression <var>M</var> * (10 ** <var>X</var>), where M is the mantissa/significand and X is the exponent. Note M must be an integer and X must be a non-negative integer, even in cases where the above expression evaluates mathematically to an integer.

Returns

parsed int value

Raises

ValueError if value cannot be parsed

Examples

parser = TypeParser()
parser.parse_int("0")    # 0
parser.parse_int("-1")   # -1
parser.parse_int("2e3")  # 2000
def parse_float( self, value: str, *, allow_scientific: bool = True, allow_inf: bool = True, allow_nan: bool = True) -> float:
616	def parse_float(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> float:
617		"""
618			Parse a string and return it as a (non-exact) float if possible
619
620			If the string represents a bool, it will be converted to `1.` for True and `0.` for False. If the string represents an int, it will be converted to a float also.
621
622			Behaves analogously to `parse_decimal()`, except that that returns an exact Decimal instead.
623
624			Parameters
625			----------
626			`value`
627			: string to be parsed
628
629			`allow_scientific`
630			: whether to accept scientific notation. If True, strings of the form `"<var>M</var>e<var>X</var>"` will be interpreted as the expression `<var>M</var> * (10 ** <var>X</var>)`, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>X</var> must be an integer, but can be negative.
631
632			`allow_inf`
633			: whether to accept positive and negative infinity values. If True, strings that match the values in `self.inf_values` (empty by default) are interpreted as infinity, or as negative infinity if prepended by a negative sign. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
634
635			`allow_nan`
636			: whether to accept NaN (not a number) representations. If True, strings that match the values in `self.nan_values` (empty by default) are interpeted as NaN. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
637
638			Returns
639			-------
640			parsed float value
641
642			Raises
643			------
644			`ValueError` if `value` cannot be parsed
645
646			Examples
647			--------
648			```python
649			parser = TypeParser(inf_values=["inf"], nan_values=["nan"])
650			parser.parse_float("1.")       # 1.
651			parser.parse_float("1.23e2")   # 123.
652			parser.parse_float("1.23e-2")  # 0.0123
653			parser.parse_float("inf")      # math.inf
654			```
655		"""
656		return self._parse_floatlike(value, float, math.inf, math.nan,
657			allow_scientific=allow_scientific,
658			allow_inf=allow_inf,
659			allow_nan=allow_nan,
660		)

Parse a string and return it as a (non-exact) float if possible

If the string represents a bool, it will be converted to 1. for True and 0. for False. If the string represents an int, it will be converted to a float also.

Behaves analogously to parse_decimal(), except that that returns an exact Decimal instead.

Parameters

value : string to be parsed

allow_scientific : whether to accept scientific notation. If True, strings of the form "<var>M</var>e<var>X</var>" will be interpreted as the expression <var>M</var> * (10 ** <var>X</var>), where M is the mantissa/significand and X is the exponent. Note that X must be an integer, but can be negative.

allow_inf : whether to accept positive and negative infinity values. If True, strings that match the values in self.inf_values (empty by default) are interpreted as infinity, or as negative infinity if prepended by a negative sign. The case sensitivity of this matching depends on self.float_case_sensitive, which is False by default.

allow_nan : whether to accept NaN (not a number) representations. If True, strings that match the values in self.nan_values (empty by default) are interpeted as NaN. The case sensitivity of this matching depends on self.float_case_sensitive, which is False by default.

Returns

parsed float value

Raises

ValueError if value cannot be parsed

Examples

parser = TypeParser(inf_values=["inf"], nan_values=["nan"])
parser.parse_float("1.")       # 1.
parser.parse_float("1.23e2")   # 123.
parser.parse_float("1.23e-2")  # 0.0123
parser.parse_float("inf")      # math.inf
def parse_decimal( self, value: str, *, allow_scientific: bool = True, allow_inf: bool = True, allow_nan: bool = True) -> decimal.Decimal:
663	def parse_decimal(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> Decimal:
664		"""
665			Parse a string and return it as an exact Decimal if possible
666
667			If the string represents a bool, it will be converted to `Decimal(1)` for True and `Decimal(0)` for False. If the string represents an int, it will be converted to a Decimal also.
668
669			Behaves analogously to `parse_float()`, except that that returns a non-exact float instead.
670
671			Parameters
672			----------
673			`value`
674			: string to be parsed
675
676			`allow_scientific`
677			: whether to accept scientific notation. If True, strings of the form `"<var>M</var>e<var>X</var>"` will be interpreted as the expression `<var>M</var> * (10 ** <var>X</var>)`, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>X</var> must be an integer, but can be negative.
678
679			`allow_inf`
680			: whether to accept positive and negative infinity values. If True, strings that match the values in `self.inf_values` (empty by default) are interpreted as infinity, or as negative infinity if prepended by a negative sign. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
681
682			`allow_nan`
683			: whether to accept NaN (not a number) representations. If True, strings that match the values in `self.nan_values` (empty by default) are interpeted as NaN. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
684
685			Returns
686			-------
687			parsed Decimal value
688
689			Raises
690			------
691			`ValueError` if `value` cannot be parsed
692
693			Examples
694			--------
695			```python
696			parser = TypeParser(inf_values=["inf"], nan_values=["nan"])
697			parser.parse_decimal("1.")       # Decimal(1)
698			parser.parse_decimal("1.23e2")   # Decimal(123)
699			parser.parse_decimal("1.23e-2")  # Decimal(123) / Decimal(10000)
700			parser.parse_decimal("inf")      # Decimal(math.inf)
701			```
702		"""
703		return self._parse_floatlike(value, Decimal, Decimal(math.inf), Decimal(math.nan),
704			allow_scientific=allow_scientific,
705			allow_inf=allow_inf,
706			allow_nan=allow_nan,
707		)

Parse a string and return it as an exact Decimal if possible

If the string represents a bool, it will be converted to Decimal(1) for True and Decimal(0) for False. If the string represents an int, it will be converted to a Decimal also.

Behaves analogously to parse_float(), except that that returns a non-exact float instead.

Parameters

value : string to be parsed

allow_scientific : whether to accept scientific notation. If True, strings of the form "<var>M</var>e<var>X</var>" will be interpreted as the expression <var>M</var> * (10 ** <var>X</var>), where M is the mantissa/significand and X is the exponent. Note that X must be an integer, but can be negative.

allow_inf : whether to accept positive and negative infinity values. If True, strings that match the values in self.inf_values (empty by default) are interpreted as infinity, or as negative infinity if prepended by a negative sign. The case sensitivity of this matching depends on self.float_case_sensitive, which is False by default.

allow_nan : whether to accept NaN (not a number) representations. If True, strings that match the values in self.nan_values (empty by default) are interpeted as NaN. The case sensitivity of this matching depends on self.float_case_sensitive, which is False by default.

Returns

parsed Decimal value

Raises

ValueError if value cannot be parsed

Examples

parser = TypeParser(inf_values=["inf"], nan_values=["nan"])
parser.parse_decimal("1.")       # Decimal(1)
parser.parse_decimal("1.23e2")   # Decimal(123)
parser.parse_decimal("1.23e-2")  # Decimal(123) / Decimal(10000)
parser.parse_decimal("inf")      # Decimal(math.inf)
def infer( self, value: str) -> Type[Union[str, int, float, decimal.Decimal, bool, NoneType, parsetypes._common.Nullable[str], parsetypes._common.Nullable[int], parsetypes._common.Nullable[float], parsetypes._common.Nullable[decimal.Decimal], parsetypes._common.Nullable[bool], parsetypes._common.Nullable[NoneType]]]:
710	def infer(self, value: str) -> AnyValueType:
711		"""
712			Infer the underlying type of a string
713
714			Also check for inline lists if `self.list_delimiter` is not None.
715
716			Parameters
717			----------
718			`value`
719			: the string for which the type should be inferred
720
721			Returns
722			-------
723			inferred type
724
725			Examples
726			--------
727			```python
728			parser = TypeParser()
729			parser.infer("true")  # bool
730			parser.infer("2.0")   # float
731			parser.infer("abc")   # str
732			```
733		"""
734		if self.is_none(value):
735			return NoneType
736		if self.is_bool(value):
737			return bool
738		if self.is_int(value):
739			return int
740		if self.is_float(value):
741			if self.use_decimal:
742				return Decimal
743			else:
744				return float
745
746		if self.trim:
747			value = value.strip()
748
749		if self.list_delimiter is not None and self.list_delimiter in value:
750			subvalues = value.split(self.list_delimiter)
751			if self.trim:
752				subvalues = [subvalue.strip() for subvalue in subvalues]
753			reduced_type = reduce_types(self.infer(subvalue) for subvalue in subvalues)
754			reduced_type = cast(AnyContainedType, reduced_type)
755			r = list[reduced_type]
756			return r  # type: ignore
757
758		return GenericValue

Infer the underlying type of a string

Also check for inline lists if self.list_delimiter is not None.

Parameters

value : the string for which the type should be inferred

Returns

inferred type

Examples

parser = TypeParser()
parser.infer("true")  # bool
parser.infer("2.0")   # float
parser.infer("abc")   # str
def infer_series( self, values: Iterable[str]) -> Type[Union[str, int, float, decimal.Decimal, bool, NoneType, parsetypes._common.Nullable[str], parsetypes._common.Nullable[int], parsetypes._common.Nullable[float], parsetypes._common.Nullable[decimal.Decimal], parsetypes._common.Nullable[bool], parsetypes._common.Nullable[NoneType]]]:
761	def infer_series(self, values: Iterable[str]) -> AnyValueType:
762		"""
763			Infer the underlying common type of a series of strings
764
765			If the values in the series do not have the same apparent type, the resulting type will be narrowest possible type that will encompass all values in the series. See `parsetypes.reduce_types()` for more information.
766
767			Parameters
768			----------
769			`values`
770			: series of strings for which the type should be inferred
771
772			Returns
773			-------
774			inferred type
775
776			Examples
777			--------
778			```python
779			parser = TypeParser()
780			parser.infer_series(["1", "2", "3.4"])       # float
781			parser.infer_series(["true", "false", "2"])  # int
782			parser.infer_series(["1", "2.3", "abc"])     # str
783			```
784		"""
785		return reduce_types(self.infer(value) for value in values)

Infer the underlying common type of a series of strings

If the values in the series do not have the same apparent type, the resulting type will be narrowest possible type that will encompass all values in the series. See parsetypes.reduce_types() for more information.

Parameters

values : series of strings for which the type should be inferred

Returns

inferred type

Examples

parser = TypeParser()
parser.infer_series(["1", "2", "3.4"])       # float
parser.infer_series(["true", "false", "2"])  # int
parser.infer_series(["1", "2.3", "abc"])     # str
def infer_table( self, rows: Iterable[Sequence[str]]) -> list[Type[Union[str, int, float, decimal.Decimal, bool, NoneType, parsetypes._common.Nullable[str], parsetypes._common.Nullable[int], parsetypes._common.Nullable[float], parsetypes._common.Nullable[decimal.Decimal], parsetypes._common.Nullable[bool], parsetypes._common.Nullable[NoneType]]]]:
788	def infer_table(self, rows: Iterable[Sequence[str]]) -> list[AnyValueType]:
789		"""
790			Infer the underlying common type for each column of a table of strings
791
792			For each column, if the values do not have the same apparent type, the resulting type will be narrowest possible type that will encompass all values in the column. See `parsetypes.reduce_types()` for more information.
793
794			Note that the inferred types of every individual value must all be able to fit into memory at once.
795
796			Parameters
797			----------
798			`rows`
799			: table of strings for which the types should be inferred, in row-major order
800
801			Returns
802			-------
803			inferred types
804
805			Examples
806			--------
807			```python
808			parser = TypeParser()
809			parser.infer_table([
810				["1",   "true",  "1"],
811				["2",   "false", "2.3"],
812				["3.4", "2",     "abc"],
813			])
814			# [float, int, str]
815			```
816		"""
817		rows_iter = iter(rows)
818		first_row = next(rows_iter, None)
819		if first_row is None:
820			return []
821
822		num_cols = len(first_row)
823		if num_cols == 0:
824			return []
825
826		table = _TypeTable([[self.infer(value)] for value in first_row])
827		for row in rows_iter:
828			table.add_row([self.infer(value) for value in row])
829
830		return [reduce_types(col) for col in table.cols]

Infer the underlying common type for each column of a table of strings

For each column, if the values do not have the same apparent type, the resulting type will be narrowest possible type that will encompass all values in the column. See parsetypes.reduce_types() for more information.

Note that the inferred types of every individual value must all be able to fit into memory at once.

Parameters

rows : table of strings for which the types should be inferred, in row-major order

Returns

inferred types

Examples

parser = TypeParser()
parser.infer_table([
	["1",   "true",  "1"],
	["2",   "false", "2.3"],
	["3.4", "2",     "abc"],
])
# [float, int, str]
def parse( self, value: str) -> Union[str, int, float, decimal.Decimal, bool, NoneType, parsetypes._common.Nullable[str], parsetypes._common.Nullable[int], parsetypes._common.Nullable[float], parsetypes._common.Nullable[decimal.Decimal], parsetypes._common.Nullable[bool], parsetypes._common.Nullable[NoneType]]:
869	def parse(self, value: str) -> AnyValue:
870		"""
871			Parse a string and convert it to its underlying type
872
873			Parameters
874			----------
875			`value`
876			: the string to be parsed
877
878			Returns
879			-------
880			converted value
881
882			Examples
883			--------
884			```python
885			parser = TypeParser()
886			parser.parse("true")  # True
887			parser.parse("2.0")   # 2.
888			parser.parse("abc")   # "abc"
889			```
890		"""
891		return self._convert(value, self.infer(value))

Parse a string and convert it to its underlying type

Parameters

value : the string to be parsed

Returns

converted value

Examples

parser = TypeParser()
parser.parse("true")  # True
parser.parse("2.0")   # 2.
parser.parse("abc")   # "abc"
def parse_series( self, values: Iterable[str]) -> list[Union[str, int, float, decimal.Decimal, bool, NoneType, parsetypes._common.Nullable[str], parsetypes._common.Nullable[int], parsetypes._common.Nullable[float], parsetypes._common.Nullable[decimal.Decimal], parsetypes._common.Nullable[bool], parsetypes._common.Nullable[NoneType]]]:
894	def parse_series(self, values: Iterable[str]) -> list[AnyValue]:
895		"""
896			Parse a series of strings and convert them to their underlying common type
897
898			If the values in the series do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the series. See `parsetypes.reduce_types()` for more information.
899
900			Parameters
901			----------
902			`values`
903			: series of strings to be parsed
904
905			Returns
906			-------
907			converted values
908
909			Examples
910			--------
911			```python
912			parser = TypeParser()
913			parser.parse_series(["1", "2", "3"])        # [1, 2, 3]
914			parser.parse_series(["5", "6.7", "8."])     # [5., 6.7, 8.]
915			parser.parse_series(["true", "false", ""])  # [True, False, None]
916			parser.parse_series(["1", "2.3", "abc"])    # ["1", "2.3", "abc"]
917			```
918		"""
919		inferred = self.infer_series(values)
920		return [self._convert(value, inferred) for value in values]

Parse a series of strings and convert them to their underlying common type

If the values in the series do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the series. See parsetypes.reduce_types() for more information.

Parameters

values : series of strings to be parsed

Returns

converted values

Examples

parser = TypeParser()
parser.parse_series(["1", "2", "3"])        # [1, 2, 3]
parser.parse_series(["5", "6.7", "8."])     # [5., 6.7, 8.]
parser.parse_series(["true", "false", ""])  # [True, False, None]
parser.parse_series(["1", "2.3", "abc"])    # ["1", "2.3", "abc"]
def parse_table( self, rows: Iterable[Sequence[str]]) -> list[list[Union[str, int, float, decimal.Decimal, bool, NoneType, parsetypes._common.Nullable[str], parsetypes._common.Nullable[int], parsetypes._common.Nullable[float], parsetypes._common.Nullable[decimal.Decimal], parsetypes._common.Nullable[bool], parsetypes._common.Nullable[NoneType]]]]:
923	def parse_table(self, rows: Iterable[Sequence[str]]) -> list[list[AnyValue]]:
924		"""
925			Parse a table of strings and convert them to the underlying common type of each column
926
927			For each column, if the values do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the column. See `parsetypes.reduce_types()` for more information.
928
929			Note that the type inference requires that the inferred types of every individual value must all be able to fit into memory at once.
930
931			This is a function that computes the entire table and returns it all at once. The generator `iterate_table()` behaves analogously, except that it computes and yields each row one at a time.
932
933			Parameters
934			----------
935			`rows`
936			: table of strings to be parsed, in row-major order
937
938			`iterator`
939			: whether the parsed values should be yielded as an iterator. If False, which is the default, the entire table is computed and returned as a list of lists. If True, this function behaves as a generator, and the rows of the table are computed and yielded one at a time. However, note that even when set to True, the type inference requires that inferred type of each individual value must all be able to fit into memory at once.
940
941			Returns
942			-------
943			converted table of values, in row-major order
944
945			Examples
946			--------
947			```python
948			parser = TypeParser()
949			table = parser.parse_table([
950				["1", "5",   "true",  "1"],
951				["2", "6.7", "false", "2.3"],
952				["3", "8.0", "",      "abc"],
953			]):
954			assert table == [
955				[1, 5.,  True,  "1"],
956				[2, 6.7, False, "2.3"],
957				[3, 8.,  None,  "abc"],
958			]
959			```
960		"""
961		return [converted_row for converted_row in self.iterate_table(rows)]

Parse a table of strings and convert them to the underlying common type of each column

For each column, if the values do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the column. See parsetypes.reduce_types() for more information.

Note that the type inference requires that the inferred types of every individual value must all be able to fit into memory at once.

This is a function that computes the entire table and returns it all at once. The generator iterate_table() behaves analogously, except that it computes and yields each row one at a time.

Parameters

rows : table of strings to be parsed, in row-major order

iterator : whether the parsed values should be yielded as an iterator. If False, which is the default, the entire table is computed and returned as a list of lists. If True, this function behaves as a generator, and the rows of the table are computed and yielded one at a time. However, note that even when set to True, the type inference requires that inferred type of each individual value must all be able to fit into memory at once.

Returns

converted table of values, in row-major order

Examples

parser = TypeParser()
table = parser.parse_table([
	["1", "5",   "true",  "1"],
	["2", "6.7", "false", "2.3"],
	["3", "8.0", "",      "abc"],
]):
assert table == [
	[1, 5.,  True,  "1"],
	[2, 6.7, False, "2.3"],
	[3, 8.,  None,  "abc"],
]
def iterate_table( self, rows: Iterable[Sequence[str]]) -> Iterator[list[Union[str, int, float, decimal.Decimal, bool, NoneType, parsetypes._common.Nullable[str], parsetypes._common.Nullable[int], parsetypes._common.Nullable[float], parsetypes._common.Nullable[decimal.Decimal], parsetypes._common.Nullable[bool], parsetypes._common.Nullable[NoneType]]]]:
964	def iterate_table(self, rows: Iterable[Sequence[str]]) -> Iterator[list[AnyValue]]:
965		"""
966			Parse a table of strings for the underlying common type of each column, then convert and yield each row
967
968			For each column, if the values do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the column. See `parsetypes.reduce_types()` for more information.
969
970			This is a generator that computes and yields each row one at a time. The function `parse_table()` behaves analogously, except that it computes the entire table and returns it as a list of lists. However, note that although this is a generator, the type inference still requires that the inferred types of every individual value must all be able to fit into memory at once.
971
972			Parameters
973			----------
974			`rows`
975			: table of strings to be parsed, in row-major order
976
977			Yields
978			-------
979			each row of converted table values
980
981			Examples
982			--------
983			```python
984			parser = TypeParser()
985			table = parser.iterate_table([
986				["1",   "true",  "1"],
987				["2",   "false", "2.3"],
988				["3.4", "2",     "abc"],
989			]):
990			assert next(table) == [1.,  1, "1"]
991			assert next(table) == [2.,  0, "2.3"]
992			assert next(table) == [3.4, 2, "abc"]
993			```
994		"""
995		inferred_types = self.infer_table(rows)
996
997		for row in rows:
998			yield [self._convert(value, inferred) for value, inferred in zip(row, inferred_types)]

Parse a table of strings for the underlying common type of each column, then convert and yield each row

For each column, if the values do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the column. See parsetypes.reduce_types() for more information.

This is a generator that computes and yields each row one at a time. The function parse_table() behaves analogously, except that it computes the entire table and returns it as a list of lists. However, note that although this is a generator, the type inference still requires that the inferred types of every individual value must all be able to fit into memory at once.

Parameters

rows : table of strings to be parsed, in row-major order

Yields

each row of converted table values

Examples

parser = TypeParser()
table = parser.iterate_table([
	["1",   "true",  "1"],
	["2",   "false", "2.3"],
	["3.4", "2",     "abc"],
]):
assert next(table) == [1.,  1, "1"]
assert next(table) == [2.,  0, "2.3"]
assert next(table) == [3.4, 2, "abc"]
Inherited Members
builtins.object
__new__
__repr__
__hash__
__str__
__getattribute__
__setattr__
__delattr__
__lt__
__le__
__eq__
__ne__
__gt__
__ge__
__reduce_ex__
__reduce__
__getstate__
__subclasshook__
__init_subclass__
__format__
__sizeof__
__dir__
def reduce_types( types: Iterable[Type[Union[str, int, float, decimal.Decimal, bool, NoneType, parsetypes._common.Nullable[str], parsetypes._common.Nullable[int], parsetypes._common.Nullable[float], parsetypes._common.Nullable[decimal.Decimal], parsetypes._common.Nullable[bool], parsetypes._common.Nullable[NoneType]]]]) -> Type[Union[str, int, float, decimal.Decimal, bool, NoneType, parsetypes._common.Nullable[str], parsetypes._common.Nullable[int], parsetypes._common.Nullable[float], parsetypes._common.Nullable[decimal.Decimal], parsetypes._common.Nullable[bool], parsetypes._common.Nullable[NoneType]]]:
157def reduce_types(types: Iterable[AnyValueType]) -> AnyValueType:
158	"""
159		Reduce multiple types into a single common type.
160
161		If the input types are not all the same, the resulting type will be narrowest possible type that will encompass all of the input types.
162
163		This operation is useful in cases such as parsing a CSV file where each column should have a consistent type, but where the individual values in a column could be interpreted variously as ints or floats (or other types).
164
165		Parameters
166		----------
167		`types`
168		: types to be reduced
169
170		Returns
171		-------
172		common reduced type
173
174		Examples
175		--------
176		```python
177		reduce_types([int, float])        # float
178		reduce_types([bool, int])         # int
179		reduce_types([int, float, str])   # str
180		```
181	"""
182	reduced_type: Union[AnyValueType, None] = None
183	for t in types:
184		if reduced_type is None:
185			reduced_type = t
186		elif t != reduced_type:
187			reduced_type = _merge_types(reduced_type, t)
188		if reduced_type == _TerminalValue:
189			return _TerminalValue
190
191	if reduced_type is None:
192		# types is empty
193		return GenericValue
194	else:
195		return reduced_type

Reduce multiple types into a single common type.

If the input types are not all the same, the resulting type will be narrowest possible type that will encompass all of the input types.

This operation is useful in cases such as parsing a CSV file where each column should have a consistent type, but where the individual values in a column could be interpreted variously as ints or floats (or other types).

Parameters

types : types to be reduced

Returns

common reduced type

Examples

reduce_types([int, float])        # float
reduce_types([bool, int])         # int
reduce_types([int, float, str])   # str