parsetypes

This package provides tools for parsing serialised data to recover their original underlying types.

The TypeParser class provides configurable type inference and parsing. This can be initialised with different settings to, for example:

  • treat inf as either a float or a normal string
  • give exact Decimal values instead of floats
  • detect inline lists
 1"""
 2	This package provides tools for parsing serialised data to recover their original underlying types.
 3
 4	The `TypeParser` class provides configurable type inference and parsing. This can be initialised with different settings to, for example:
 5	- treat `inf` as either a float or a normal string
 6	- give exact Decimal values instead of floats
 7	- detect inline lists
 8"""
 9
10
11__version__ = "0.1"
12
13from ._common import AnyScalar, AnyScalarType, AnyValue, AnyValueType, GenericValue, Nullable
14from ._parser import TypeParser
15from ._reduce_types import reduce_types
16
17__all__ = ('TypeParser', 'reduce_types')
class TypeParser:
 61class TypeParser:
 62	"""
 63		A parser that can be used to infer the underlying types of data serialised as strings, and to convert them into their original underlying types.
 64
 65		Instances of this class can be configured with different settings for the parser and inferrer. See the constructor for more details about the available options.
 66	"""
 67
 68	def __init__(self,
 69	    *,
 70		trim: bool=True,
 71		use_decimal: bool=False,
 72		list_delimiter: str | None=None,
 73		none_values: Iterable[str]=[""],
 74		none_case_sensitive: bool=False,
 75		true_values: Iterable[str]=["true"],
 76		false_values: Iterable[str]=["false"],
 77		bool_case_sensitive: bool=False,
 78		int_case_sensitive: bool=False,
 79		inf_values: Iterable[str]=[],
 80		nan_values: Iterable[str]=[],
 81		float_case_sensitive: bool=False,
 82		case_sensitive: bool | None=None,
 83	):
 84		"""
 85			Initialise a new parser
 86
 87			Parameters
 88			----------
 89			`trim`
 90			: whether leading and trailing whitespace should be stripped from strings
 91
 92			`use_decimal`
 93			: whether non-integer numeric values should be inferred as Decimal (exact values) instead of float (non-exact values). Note that this only applies to methods that attempt to infer type (`infer()` and `infer_*()`), and does not affect methods where the type is explicitly specified (`is_float()`, `is_decimal()`, `parse_float()`, `parse_decimal()`).
 94
 95			`list_delimiter`
 96			: the delimiter used for identifying lists and for separating list items. If set to None, the parser will not attempt to identify lists when inferring types, which usually causes the value to be treated as a str instead.
 97
 98			`none_values`
 99			: list of strings that represent the value None
100
101			`none_case_sensitive`
102			: whether matches against `none_values` should be made in a case-sensitive manner
103
104			`true_values`
105			: list of strings that represent the bool value True
106
107			`false_values`
108			: list of strings that represent the bool value False
109
110			`bool_case_sensitive`
111			: whether matches against `true_values` and `false_values` should be made in a case-sensitive manner
112
113			`int_case_sensitive`
114			: whether checks for int should be done in a case-sensitive manner. This usually only applies to values given in scientific notation, where the mantissa and exponent usually are separated by `e`.
115
116			`inf_values`
117			: list of strings that represent the float or Decimal value of infinity. Each of the strings can be prepended with a negative sign to represent negative infinity also.
118
119			`nan_values`
120			: list of strings that represent a float or Decimal that is NaN (not a number)
121
122			`float_case_sensitive`
123			: whether checks for float should be done in a case-sensitive manner. This applies to matches against `inf_values` and `nan_values`, as well as to values given in scientific notation, where the mantissa and exponent are usually separated by `e`.
124
125			`case_sensitive`
126			: whether all matches should be made in a case-sensitive manner. Sets all of `none_case_sensitive`, `bool_case_sensitive`, `int_case_sensitive`, `float_case_sensitive` to the same value, ignoring any individual settings.
127
128			Raises
129			------
130			`ValueError` if any of the options would lead to ambiguities during parsing
131		"""
132
133		if case_sensitive is not None:
134			none_case_sensitive = case_sensitive
135			int_case_sensitive = case_sensitive
136			bool_case_sensitive = case_sensitive
137			float_case_sensitive = case_sensitive
138
139		self.trim = trim
140		if self.trim:
141			none_values = (value.strip() for value in none_values)
142			true_values = (value.strip() for value in true_values)
143			false_values = (value.strip() for value in false_values)
144			inf_values = (value.strip() for value in inf_values)
145			nan_values = (value.strip() for value in nan_values)
146
147		self.use_decimal = use_decimal
148		self.list_delimiter = list_delimiter
149
150		self.none_case_sensitive = none_case_sensitive
151		if not self.none_case_sensitive:
152			none_values = (value.lower() for value in none_values)
153		self.none_values = set(none_values)
154
155		self.bool_case_sensitive = bool_case_sensitive
156		if not self.bool_case_sensitive:
157			true_values = (value.lower() for value in true_values)
158			false_values = (value.lower() for value in false_values)
159		self.true_values = set(true_values)
160		self.false_values = set(false_values)
161
162		self.int_case_sensitive = int_case_sensitive
163
164		self.float_case_sensitive = float_case_sensitive
165		if not self.float_case_sensitive:
166			inf_values = (value.lower() for value in inf_values)
167			nan_values = (value.lower() for value in nan_values)
168		self.inf_values = set(inf_values)
169		self.nan_values = set(nan_values)
170
171		# Unconfigurable default values
172		self._negative_char = "-"
173		self._negative_chars = {self._negative_char, "−"}
174		self._sign_chars = self._negative_chars | {"+"}
175		self._digit_chars = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"}  # Because isdigit("²") == True, but int("²") is invalid
176		self._digit_separators = {"_"}
177		self._scientific_char = "e"
178		self._float_separator = "."
179		self._reserved_chars = self._sign_chars | self._digit_chars | self._digit_separators | {self._scientific_char} | {self._float_separator}
180		# special_chars = self._reserved_chars | self.list_delimiter
181
182		# Check if any special values conflict
183		for name, special_values in [
184			(_SpecialValue.LIST, [self.list_delimiter] if self.list_delimiter is not None else []),
185			(_SpecialValue.NONE, self.none_values),
186			(_SpecialValue.TRUE, self.true_values),
187			(_SpecialValue.FALSE, self.false_values),
188			(_SpecialValue.INF, self.inf_values),
189			(_SpecialValue.NAN, self.nan_values),
190		]:
191			for special_value in special_values:
192				if special_value in self._reserved_chars:
193					raise ValueError(f"cannot use reserved char as {name.value}: {special_value}")
194
195				if name != _SpecialValue.NONE and self.is_none(special_value):
196					raise ValueError(f"cannot use None value as {name.value}: {special_value}")
197
198				if (
199					(name == _SpecialValue.TRUE and self.parse_bool(special_value) != True) or
200					(name == _SpecialValue.FALSE and self.parse_bool(special_value) != False) or
201					(name != _SpecialValue.TRUE and name != _SpecialValue.FALSE and self.is_bool(special_value))
202				):
203					raise ValueError(f"cannot use bool value as {name.value}: {special_value}")
204
205				if self.is_int(special_value):
206					raise ValueError(f"cannot use int value as {name.value}: {special_value}")
207
208				if self.use_decimal:
209					if (
210						(name == _SpecialValue.INF and self.parse_decimal(special_value) != Decimal(math.inf)) or
211						(name == _SpecialValue.NAN and not self.parse_decimal(special_value).is_nan()) or
212						(name != _SpecialValue.INF and name != _SpecialValue.NAN and self.is_float(special_value))
213					):
214						raise ValueError(f"cannot use Decimal value as {name}: {special_value}")
215				else:
216					if (
217						(name == _SpecialValue.INF and self.parse_float(special_value) != math.inf) or
218						(name == _SpecialValue.NAN and self.parse_float(special_value) is not math.nan) or
219						(name != _SpecialValue.INF and name != _SpecialValue.NAN and self.is_float(special_value))
220					):
221						raise ValueError(f"cannot use float value as {name}: {special_value}")
222
223
224	def is_none(self, value: str) -> bool:
225		"""
226			Check if a string represents the value None
227
228			Only strings that match the values in `self.none_values` will be interpreted as None. The default accepted values are `[""]`, i.e. an empty string. The case sensitivity of this matching depends on `self.none_case_sensitive`, which is False by default.
229
230			Parameters
231			----------
232			`value`
233			: string to be checked
234
235			Returns
236			-------
237			whether it is None
238
239			Examples
240			--------
241			```python
242			parser = TypeParser()
243			parser.parse_bool("")     # True
244			parser.parse_bool("abc")  # False
245			```
246		"""
247		if self.trim:
248			value = value.strip()
249		if not self.bool_case_sensitive:
250			value = value.lower()
251
252		if value in self.none_values:
253			return True
254		else:
255			return False
256
257
258	def is_bool(self, value: str) -> bool:
259		"""
260			Check if a string represents a bool
261
262			Only strings that match the values in `self.true_values` and `self.false_values` will be interpreted as booleans. The default accepted values are `["true"]` and `["false"]` respectively. The case sensitivity of this matching depends on `self.bool_case_sensitive`, which is False by default.
263
264			Parameters
265			----------
266			`value`
267			: string to be checked
268
269			Returns
270			-------
271			whether it is a bool
272
273			Examples
274			--------
275			```python
276			parser = TypeParser()
277			parser.is_bool("true")  # True
278			parser.is_bool("")      # True
279			parser.is_bool("abc")   # False
280			```
281		"""
282		if self.trim:
283			value = value.strip()
284
285		if not self.bool_case_sensitive:
286			value = value.lower()
287		if value in self.true_values:
288			return True
289		if value in self.false_values:
290			return True
291
292		return False
293
294
295	def is_int(self, value: str, *, allow_sign: bool=True, allow_negative: bool=True, allow_scientific: bool=True) -> bool:
296		"""
297			Check if a string represents an int
298
299			Parameters
300			----------
301			`value`
302			: string to be checked
303
304			`allow_negative`
305			: whether to accept negative values
306
307			`allow_sign`
308			: whether to accept signed values. If False, it implies that `allow_negative` is False also.
309
310			`allow_scientific`
311			: whether to accept scientific notation. If True, strings of the form `"<var>M</var>e<var>X</var>"` will be interpreted as the expression `<var>M</var> * (10 ** <var>X</var>)`, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note <var>M</var> *must* be an integer and <var>X</var> *must* be a non-negative integer, even in cases where the above expression evaluates mathematically to an integer.
312
313			Returns
314			-------
315			whether it is an int
316
317			Examples
318			--------
319			```python
320			parser = TypeParser()
321			parser.is_int("0")    # True
322			parser.is_int("-1")   # True
323			parser.is_int("abc")  # False
324			parser.is_int("")     # False
325			```
326		"""
327		if self.trim:
328			value = value.strip()
329
330		if len(value) == 0:
331			return False
332
333		if allow_scientific:
334			value, exp = _decompose_string_pair(value, self._scientific_char, self.int_case_sensitive)
335			if exp is not None:
336				return self.is_int(
337					value, allow_sign=True, allow_negative=allow_negative, allow_scientific=False
338				) and self.is_int(
339					exp, allow_sign=True, allow_negative=False, allow_scientific=False
340				)
341
342		if value[0] in self._sign_chars:
343			if len(value) == 1:
344				return False
345			if not allow_sign:
346				return False
347			if not allow_negative and value[0] in self._negative_chars:
348				return False
349			value = value[1:]
350		if value[0] in self._digit_separators or value[-1] in self._digit_separators:
351			return False
352
353		prev_separated = False
354		for c in value:
355			if c in self._digit_separators:
356				if prev_separated:
357					return False
358				prev_separated = True
359			else:
360				prev_separated = False
361				if c not in self._digit_chars:
362					return False
363		return True
364
365
366	def is_float(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> bool:
367		"""
368			Check if a string represents a float (or equivalently, a Decimal)
369
370			This function will also return True if the string represents an int.
371
372			Alias: `is_decimal()`
373
374			Parameters
375			----------
376			`value`
377			: string to be checked
378
379			`allow_scientific`
380			: whether to accept scientific notation. If True, strings of the form `"<var>M</var>e<var>X</var>"` will be interpreted as the expression `<var>M</var> * (10 ** <var>X</var>)`, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>X</var> must be an integer, but can be negative.
381
382			`allow_inf`
383			: whether to accept positive and negative infinity values. If True, strings that match the values in `self.inf_values` (empty by default) are interpreted as infinity, or as negative infinity if prepended by a negative sign. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
384
385			`allow_nan`
386			: whether to accept NaN (not a number) representations. If True, strings that match the values in `self.nan_values` (empty by default) are interpeted as NaN. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
387
388			Returns
389			-------
390			whether it is a float or Decimal
391
392			Examples
393			--------
394			```python
395			parser = TypeParser()
396			parser.is_float("1.")       # True
397			parser.is_float("12.3e-2")  # True
398			parser.is_float("abc")      # False
399			parser.is_float("")         # False
400			```
401		"""
402		if self.trim:
403			value = value.strip()
404
405		if len(value) > 0 and value[0] in self._sign_chars:
406			value = value[1:]
407
408		if self.float_case_sensitive:
409			special_value = value
410		else:
411			special_value = value.lower()
412		if allow_inf and special_value in self.inf_values:
413			return True
414		if allow_nan and special_value in self.nan_values:
415			return True
416
417		if len(value) == 0:
418			return False
419
420		if allow_scientific:
421			value, exp = _decompose_string_pair(value, self._scientific_char, self.float_case_sensitive)
422			if exp is not None:
423				return self.is_float(value, allow_scientific=False, allow_inf=False, allow_nan=False) and self.is_int(exp, allow_sign=True, allow_negative=True, allow_scientific=False)
424
425		value, frac = _decompose_string_pair(value, self._float_separator, self.float_case_sensitive)
426		if frac is not None:
427			if value == "" and frac == "":
428				return False
429			return (
430				self.is_int(value, allow_sign=True, allow_negative=False, allow_scientific=False) or value == ""
431			) and (
432				self.is_int(frac, allow_sign=False, allow_negative=False, allow_scientific=False) or frac == ""
433			)
434
435		return self.is_int(value, allow_sign=True, allow_negative=True, allow_scientific=False)
436
437
438	def is_decimal(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> bool:
439		"""
440			Alias of `is_float()`
441		"""
442		return self.is_float(value, allow_scientific=allow_scientific, allow_inf=allow_inf, allow_nan=allow_nan)
443
444
445	def parse_none(self, value: str) -> None:
446		"""
447			Parse a string and return it as the value None if possible
448
449			Only strings that match the values in `self.none_values` will be interpreted as None. The default accepted values are `[""]`, i.e. an empty string. The case sensitivity of this matching depends on `self.none_case_sensitive`, which is False by default.
450
451			Parameters
452			----------
453			`value`
454			: string to be parsed
455
456			Returns
457			-------
458			parsed None value
459
460			Raises
461			------
462			`ValueError` if `value` cannot be parsed
463
464			Examples
465			--------
466			```python
467			parser = TypeParser()
468			parser.parse_bool("")     # None
469			parser.parse_bool("abc")  # raises ValueError
470			```
471		"""
472		if self.is_none(value):
473			return None
474		else:
475			raise ValueError(f"not a none value: {value}")
476
477
478	def parse_bool(self, value: str) -> bool:
479		"""
480			Parse a string and return it as a bool if possible
481
482			Only strings that match the values in `self.true_values` and `self.false_values` will be interpreted as booleans. The default accepted values are `["true"]` and `["false"]` respectively. The case sensitivity of this matching depends on `self.bool_case_sensitive`, which is False by default.
483
484			Parameters
485			----------
486			`value`
487			: string to be parsed
488
489			Returns
490			-------
491			parsed bool value
492
493			Raises
494			------
495			`ValueError` if `value` cannot be parsed
496
497			Examples
498			--------
499			```python
500			parser = TypeParser()
501			parser.parse_bool("true")   # True
502			parser.parse_bool("FALSE")  # False
503			```
504		"""
505		if self.trim:
506			value = value.strip()
507
508		if self.bool_case_sensitive:
509			special_value = value
510		else:
511			special_value = value.lower()
512		if special_value in self.true_values:
513			return True
514		if special_value in self.false_values:
515			return False
516
517		raise ValueError(f"not a boolean: {value}")
518
519
520	def parse_int(self, value: str, *, allow_scientific: bool=True) -> int:
521		"""
522			Parse a string and return it as an int if possible
523
524			If the string represents a bool, it will be converted to `1` for True and `0` for False.
525
526			Parameters
527			----------
528			`value`
529			: string to be parsed
530
531			`allow_scientific`
532			: whether to accept scientific notation. If True, strings of the form `"<var>M</var>e<var>X</var>"` will be interpreted as the expression `<var>M</var> * (10 ** <var>X</var>)`, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note <var>M</var> *must* be an integer and <var>X</var> *must* be a non-negative integer, even in cases where the above expression evaluates mathematically to an integer.
533
534			Returns
535			-------
536			parsed int value
537
538			Raises
539			------
540			`ValueError` if `value` cannot be parsed
541
542			Examples
543			--------
544			```python
545			parser = TypeParser()
546			parser.parse_int("0")    # 0
547			parser.parse_int("-1")   # -1
548			parser.parse_int("2e3")  # 2000
549			```
550		"""
551		if self.trim:
552			value = value.strip()
553
554		if self.is_int(value, allow_sign=True, allow_negative=True, allow_scientific=allow_scientific):
555			if allow_scientific:
556				value, exp = _decompose_string_pair(value, self._scientific_char, self.int_case_sensitive)
557				if exp is not None:
558					if value[0] in (self._negative_chars - {self._negative_char}):
559						value = self._negative_char + value[1:]
560					return int(value) * (10 ** int(exp))
561
562			if value[0] in (self._negative_chars - {self._negative_char}):
563				value = self._negative_char + value[1:]
564			return int(value)
565
566		elif self.is_bool(value):
567			return int(self.parse_bool(value))
568		else:
569			raise ValueError(f"not an integer: {value}")
570
571
572	def _parse_floatlike(self,
573		value: str,
574		converter: Callable[[Union[str, bool]], _FloatLike],
575		inf_value: _FloatLike,
576		nan_value: _FloatLike,
577		*,
578		allow_scientific: bool=True,
579		allow_inf: bool=True,
580		allow_nan: bool=True
581	) -> _FloatLike:
582		if self.trim:
583			value = value.strip()
584		if self.is_float(value, allow_scientific=allow_scientific, allow_inf=allow_inf, allow_nan=allow_nan):
585			if self.float_case_sensitive:
586				special_value = value
587			else:
588				special_value = value.lower()
589			if allow_inf and special_value in self.inf_values:
590				return inf_value
591			if allow_nan and special_value in self.nan_values:
592				return nan_value
593
594			if len(value) > 0 and value[0] in self._sign_chars:
595				positive_part = value[1:]
596				if self.float_case_sensitive:
597					special_value = positive_part
598				else:
599					special_value = positive_part.lower()
600				if allow_inf and special_value in self.inf_values:
601					if value[0] in self._negative_chars:
602						return -1 * inf_value
603					else:
604						return inf_value
605				if allow_nan and special_value in self.nan_values:
606					return nan_value
607
608				if value[0] in self._negative_chars:
609					value = self._negative_char + positive_part
610			return converter(value)
611		elif self.is_bool(value):
612			return converter(self.parse_bool(value))
613		else:
614			raise ValueError(f"not a {_FloatLike.__name__}: {value}")
615
616
617	def parse_float(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> float:
618		"""
619			Parse a string and return it as a (non-exact) float if possible
620
621			If the string represents a bool, it will be converted to `1.` for True and `0.` for False. If the string represents an int, it will be converted to a float also.
622
623			Behaves analogously to `parse_decimal()`, except that that returns an exact Decimal instead.
624
625			Parameters
626			----------
627			`value`
628			: string to be parsed
629
630			`allow_scientific`
631			: whether to accept scientific notation. If True, strings of the form `"<var>M</var>e<var>X</var>"` will be interpreted as the expression `<var>M</var> * (10 ** <var>X</var>)`, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>X</var> must be an integer, but can be negative.
632
633			`allow_inf`
634			: whether to accept positive and negative infinity values. If True, strings that match the values in `self.inf_values` (empty by default) are interpreted as infinity, or as negative infinity if prepended by a negative sign. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
635
636			`allow_nan`
637			: whether to accept NaN (not a number) representations. If True, strings that match the values in `self.nan_values` (empty by default) are interpeted as NaN. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
638
639			Returns
640			-------
641			parsed float value
642
643			Raises
644			------
645			`ValueError` if `value` cannot be parsed
646
647			Examples
648			--------
649			```python
650			parser = TypeParser(inf_values=["inf"], nan_values=["nan"])
651			parser.parse_float("1.")       # 1.
652			parser.parse_float("1.23e2")   # 123.
653			parser.parse_float("1.23e-2")  # 0.0123
654			parser.parse_float("inf")      # math.inf
655			```
656		"""
657		return self._parse_floatlike(value, float, math.inf, math.nan,
658			allow_scientific=allow_scientific,
659			allow_inf=allow_inf,
660			allow_nan=allow_nan,
661		)
662
663
664	def parse_decimal(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> Decimal:
665		"""
666			Parse a string and return it as an exact Decimal if possible
667
668			If the string represents a bool, it will be converted to `Decimal(1)` for True and `Decimal(0)` for False. If the string represents an int, it will be converted to a Decimal also.
669
670			Behaves analogously to `parse_float()`, except that that returns a non-exact float instead.
671
672			Parameters
673			----------
674			`value`
675			: string to be parsed
676
677			`allow_scientific`
678			: whether to accept scientific notation. If True, strings of the form `"<var>M</var>e<var>X</var>"` will be interpreted as the expression `<var>M</var> * (10 ** <var>X</var>)`, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>X</var> must be an integer, but can be negative.
679
680			`allow_inf`
681			: whether to accept positive and negative infinity values. If True, strings that match the values in `self.inf_values` (empty by default) are interpreted as infinity, or as negative infinity if prepended by a negative sign. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
682
683			`allow_nan`
684			: whether to accept NaN (not a number) representations. If True, strings that match the values in `self.nan_values` (empty by default) are interpeted as NaN. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
685
686			Returns
687			-------
688			parsed Decimal value
689
690			Raises
691			------
692			`ValueError` if `value` cannot be parsed
693
694			Examples
695			--------
696			```python
697			parser = TypeParser(inf_values=["inf"], nan_values=["nan"])
698			parser.parse_decimal("1.")       # Decimal(1)
699			parser.parse_decimal("1.23e2")   # Decimal(123)
700			parser.parse_decimal("1.23e-2")  # Decimal(123) / Decimal(10000)
701			parser.parse_decimal("inf")      # Decimal(math.inf)
702			```
703		"""
704		return self._parse_floatlike(value, Decimal, Decimal(math.inf), Decimal(math.nan),
705			allow_scientific=allow_scientific,
706			allow_inf=allow_inf,
707			allow_nan=allow_nan,
708		)
709
710
711	def infer(self, value: str) -> AnyValueType:
712		"""
713			Infer the underlying type of a string
714
715			Also check for inline lists if `self.list_delimiter` is not None.
716
717			Parameters
718			----------
719			`value`
720			: the string for which the type should be inferred
721
722			Returns
723			-------
724			inferred type
725
726			Examples
727			--------
728			```python
729			parser = TypeParser()
730			parser.infer("true")  # bool
731			parser.infer("2.0")   # float
732			parser.infer("abc")   # str
733			```
734		"""
735		if self.is_none(value):
736			return NoneType
737		if self.is_bool(value):
738			return bool
739		if self.is_int(value):
740			return int
741		if self.is_float(value):
742			if self.use_decimal:
743				return Decimal
744			else:
745				return float
746
747		if self.trim:
748			value = value.strip()
749
750		if self.list_delimiter is not None and self.list_delimiter in value:
751			subvalues = value.split(self.list_delimiter)
752			if self.trim:
753				subvalues = [subvalue.strip() for subvalue in subvalues]
754			reduced_type = reduce_types(self.infer(subvalue) for subvalue in subvalues)
755			reduced_type = cast(AnyContainedType, reduced_type)
756			r = list[reduced_type]
757			return r  # type: ignore
758
759		return GenericValue
760
761
762	def infer_series(self, values: Iterable[str]) -> AnyValueType:
763		"""
764			Infer the underlying common type of a series of strings
765
766			If the values in the series do not have the same apparent type, the resulting type will be narrowest possible type that will encompass all values in the series. See `parsetypes.reduce_types()` for more information.
767
768			Parameters
769			----------
770			`values`
771			: series of strings for which the type should be inferred
772
773			Returns
774			-------
775			inferred type
776
777			Examples
778			--------
779			```python
780			parser = TypeParser()
781			parser.infer_series(["1", "2", "3.4"])       # float
782			parser.infer_series(["true", "false", "2"])  # int
783			parser.infer_series(["1", "2.3", "abc"])     # str
784			```
785		"""
786		return reduce_types(self.infer(value) for value in values)
787
788
789	def infer_table(self, rows: Iterable[Sequence[str]]) -> list[AnyValueType]:
790		"""
791			Infer the underlying common type for each column of a table of strings
792
793			For each column, if the values do not have the same apparent type, the resulting type will be narrowest possible type that will encompass all values in the column. See `parsetypes.reduce_types()` for more information.
794
795			Note that the inferred types of every individual value must all be able to fit into memory at once.
796
797			Parameters
798			----------
799			`rows`
800			: table of strings for which the types should be inferred, in row-major order
801
802			Returns
803			-------
804			inferred types
805
806			Examples
807			--------
808			```python
809			parser = TypeParser()
810			parser.infer_table([
811				["1",   "true",  "1"],
812				["2",   "false", "2.3"],
813				["3.4", "2",     "abc"],
814			])
815			# [float, int, str]
816			```
817		"""
818		rows_iter = iter(rows)
819		first_row = next(rows_iter, None)
820		if first_row is None:
821			return []
822
823		num_cols = len(first_row)
824		if num_cols == 0:
825			return []
826
827		table = _TypeTable([[self.infer(value)] for value in first_row])
828		for row in rows_iter:
829			table.add_row([self.infer(value) for value in row])
830
831		return [reduce_types(col) for col in table.cols]
832
833
834	def _convert(self, value: str, t: AnyValueType) -> AnyValue:
835		base, type_args = _decompose_type(t)
836		if base == NoneType:
837			return None
838		elif base == bool:
839			return self.parse_bool(value)
840		elif base == int:
841			return self.parse_int(value)
842		elif base == Decimal:
843			return self.parse_decimal(value)
844		elif base == float:
845			return self.parse_float(value)
846		elif base == str:
847			return value
848		elif base == Nullable:
849			if self.is_none(value):
850				return None
851			else:
852				if type_args is not  None and len(type_args) == 1 and type_args[0] != str:
853					inner_type = type_args[0]
854					return self._convert(value, inner_type)
855				else:
856					return value
857		elif base == list:
858			subvalues = value.split(self.list_delimiter)
859			if self.trim:
860				subvalues = [subvalue.strip() for subvalue in subvalues]
861			if type_args is not None and len(type_args) == 1 and type_args[0] != str:
862				subtype = type_args[0]
863				return cast(AnyContained, [self._convert(subvalue, subtype) for subvalue in subvalues])
864			else:
865				return subvalues
866		else:
867			return value
868
869
870	def parse(self, value: str) -> AnyValue:
871		"""
872			Parse a string and convert it to its underlying type
873
874			Parameters
875			----------
876			`value`
877			: the string to be parsed
878
879			Returns
880			-------
881			converted value
882
883			Examples
884			--------
885			```python
886			parser = TypeParser()
887			parser.parse("true")  # True
888			parser.parse("2.0")   # 2.
889			parser.parse("abc")   # "abc"
890			```
891		"""
892		return self._convert(value, self.infer(value))
893
894
895	def parse_series(self, values: Iterable[str]) -> list[AnyValue]:
896		"""
897			Parse a series of strings and convert them to their underlying common type
898
899			If the values in the series do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the series. See `parsetypes.reduce_types()` for more information.
900
901			Parameters
902			----------
903			`values`
904			: series of strings to be parsed
905
906			Returns
907			-------
908			converted values
909
910			Examples
911			--------
912			```python
913			parser = TypeParser()
914			parser.parse_series(["1", "2", "3"])        # [1, 2, 3]
915			parser.parse_series(["5", "6.7", "8."])     # [5., 6.7, 8.]
916			parser.parse_series(["true", "false", ""])  # [True, False, None]
917			parser.parse_series(["1", "2.3", "abc"])    # ["1", "2.3", "abc"]
918			```
919		"""
920		inferred = self.infer_series(values)
921		return [self._convert(value, inferred) for value in values]
922
923
924	def parse_table(self, rows: Iterable[Sequence[str]]) -> list[list[AnyValue]]:
925		"""
926			Parse a table of strings and convert them to the underlying common type of each column
927
928			For each column, if the values do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the column. See `parsetypes.reduce_types()` for more information.
929
930			Note that the type inference requires that the inferred types of every individual value must all be able to fit into memory at once.
931
932			This is a function that computes the entire table and returns it all at once. The generator `iterate_table()` behaves analogously, except that it computes and yields each row one at a time.
933
934			Parameters
935			----------
936			`rows`
937			: table of strings to be parsed, in row-major order
938
939			`iterator`
940			: whether the parsed values should be yielded as an iterator. If False, which is the default, the entire table is computed and returned as a list of lists. If True, this function behaves as a generator, and the rows of the table are computed and yielded one at a time. However, note that even when set to True, the type inference requires that inferred type of each individual value must all be able to fit into memory at once.
941
942			Returns
943			-------
944			converted table of values, in row-major order
945
946			Examples
947			--------
948			```python
949			parser = TypeParser()
950			table = parser.parse_table([
951				["1", "5",   "true",  "1"],
952				["2", "6.7", "false", "2.3"],
953				["3", "8.0", "",     "abc"],
954			]):
955			assert table == [
956				[1, 5.,  True,  "1"],
957				[2, 6.7, False, "2.3"],
958				[3, 8.,  None,  "abc"],
959			]
960			```
961		"""
962		return [converted_row for converted_row in self.iterate_table(rows)]
963
964
965	def iterate_table(self, rows: Iterable[Sequence[str]]) -> Iterator[list[AnyValue]]:
966		"""
967			Parse a table of strings for the underlying common type of each column, then convert and yield each row
968
969			For each column, if the values do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the column. See `parsetypes.reduce_types()` for more information.
970
971			This is a generator that computes and yields each row one at a time. The function `parse_table()` behaves analogously, except that it computes the entire table and returns it as a list of lists. However, note that although this is a generator, the type inference still requires that the inferred types of every individual value must all be able to fit into memory at once.
972
973			Parameters
974			----------
975			`rows`
976			: table of strings to be parsed, in row-major order
977
978			Yields
979			-------
980			each row of converted table values
981
982			Examples
983			--------
984			```python
985			parser = TypeParser()
986			table = parser.iterate_table([
987				["1",   "true",  "1"],
988				["2",   "false", "2.3"],
989				["3.4", "2",     "abc"],
990			]):
991			assert next(table) == [1.,  1, "1"]
992			assert next(table) == [2.,  0, "2.3"]
993			assert next(table) == [3.4, 2, "abc"]
994			```
995		"""
996		inferred_types = self.infer_table(rows)
997
998		for row in rows:
999			yield [self._convert(value, inferred) for value, inferred in zip(row, inferred_types)]

A parser that can be used to infer the underlying types of data serialised as strings, and to convert them into their original underlying types.

Instances of this class can be configured with different settings for the parser and inferrer. See the constructor for more details about the available options.

TypeParser( *, trim: bool = True, use_decimal: bool = False, list_delimiter: str | None = None, none_values: Iterable[str] = [''], none_case_sensitive: bool = False, true_values: Iterable[str] = ['true'], false_values: Iterable[str] = ['false'], bool_case_sensitive: bool = False, int_case_sensitive: bool = False, inf_values: Iterable[str] = [], nan_values: Iterable[str] = [], float_case_sensitive: bool = False, case_sensitive: bool | None = None)
 68	def __init__(self,
 69	    *,
 70		trim: bool=True,
 71		use_decimal: bool=False,
 72		list_delimiter: str | None=None,
 73		none_values: Iterable[str]=[""],
 74		none_case_sensitive: bool=False,
 75		true_values: Iterable[str]=["true"],
 76		false_values: Iterable[str]=["false"],
 77		bool_case_sensitive: bool=False,
 78		int_case_sensitive: bool=False,
 79		inf_values: Iterable[str]=[],
 80		nan_values: Iterable[str]=[],
 81		float_case_sensitive: bool=False,
 82		case_sensitive: bool | None=None,
 83	):
 84		"""
 85			Initialise a new parser
 86
 87			Parameters
 88			----------
 89			`trim`
 90			: whether leading and trailing whitespace should be stripped from strings
 91
 92			`use_decimal`
 93			: whether non-integer numeric values should be inferred as Decimal (exact values) instead of float (non-exact values). Note that this only applies to methods that attempt to infer type (`infer()` and `infer_*()`), and does not affect methods where the type is explicitly specified (`is_float()`, `is_decimal()`, `parse_float()`, `parse_decimal()`).
 94
 95			`list_delimiter`
 96			: the delimiter used for identifying lists and for separating list items. If set to None, the parser will not attempt to identify lists when inferring types, which usually causes the value to be treated as a str instead.
 97
 98			`none_values`
 99			: list of strings that represent the value None
100
101			`none_case_sensitive`
102			: whether matches against `none_values` should be made in a case-sensitive manner
103
104			`true_values`
105			: list of strings that represent the bool value True
106
107			`false_values`
108			: list of strings that represent the bool value False
109
110			`bool_case_sensitive`
111			: whether matches against `true_values` and `false_values` should be made in a case-sensitive manner
112
113			`int_case_sensitive`
114			: whether checks for int should be done in a case-sensitive manner. This usually only applies to values given in scientific notation, where the mantissa and exponent usually are separated by `e`.
115
116			`inf_values`
117			: list of strings that represent the float or Decimal value of infinity. Each of the strings can be prepended with a negative sign to represent negative infinity also.
118
119			`nan_values`
120			: list of strings that represent a float or Decimal that is NaN (not a number)
121
122			`float_case_sensitive`
123			: whether checks for float should be done in a case-sensitive manner. This applies to matches against `inf_values` and `nan_values`, as well as to values given in scientific notation, where the mantissa and exponent are usually separated by `e`.
124
125			`case_sensitive`
126			: whether all matches should be made in a case-sensitive manner. Sets all of `none_case_sensitive`, `bool_case_sensitive`, `int_case_sensitive`, `float_case_sensitive` to the same value, ignoring any individual settings.
127
128			Raises
129			------
130			`ValueError` if any of the options would lead to ambiguities during parsing
131		"""
132
133		if case_sensitive is not None:
134			none_case_sensitive = case_sensitive
135			int_case_sensitive = case_sensitive
136			bool_case_sensitive = case_sensitive
137			float_case_sensitive = case_sensitive
138
139		self.trim = trim
140		if self.trim:
141			none_values = (value.strip() for value in none_values)
142			true_values = (value.strip() for value in true_values)
143			false_values = (value.strip() for value in false_values)
144			inf_values = (value.strip() for value in inf_values)
145			nan_values = (value.strip() for value in nan_values)
146
147		self.use_decimal = use_decimal
148		self.list_delimiter = list_delimiter
149
150		self.none_case_sensitive = none_case_sensitive
151		if not self.none_case_sensitive:
152			none_values = (value.lower() for value in none_values)
153		self.none_values = set(none_values)
154
155		self.bool_case_sensitive = bool_case_sensitive
156		if not self.bool_case_sensitive:
157			true_values = (value.lower() for value in true_values)
158			false_values = (value.lower() for value in false_values)
159		self.true_values = set(true_values)
160		self.false_values = set(false_values)
161
162		self.int_case_sensitive = int_case_sensitive
163
164		self.float_case_sensitive = float_case_sensitive
165		if not self.float_case_sensitive:
166			inf_values = (value.lower() for value in inf_values)
167			nan_values = (value.lower() for value in nan_values)
168		self.inf_values = set(inf_values)
169		self.nan_values = set(nan_values)
170
171		# Unconfigurable default values
172		self._negative_char = "-"
173		self._negative_chars = {self._negative_char, "−"}
174		self._sign_chars = self._negative_chars | {"+"}
175		self._digit_chars = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"}  # Because isdigit("²") == True, but int("²") is invalid
176		self._digit_separators = {"_"}
177		self._scientific_char = "e"
178		self._float_separator = "."
179		self._reserved_chars = self._sign_chars | self._digit_chars | self._digit_separators | {self._scientific_char} | {self._float_separator}
180		# special_chars = self._reserved_chars | self.list_delimiter
181
182		# Check if any special values conflict
183		for name, special_values in [
184			(_SpecialValue.LIST, [self.list_delimiter] if self.list_delimiter is not None else []),
185			(_SpecialValue.NONE, self.none_values),
186			(_SpecialValue.TRUE, self.true_values),
187			(_SpecialValue.FALSE, self.false_values),
188			(_SpecialValue.INF, self.inf_values),
189			(_SpecialValue.NAN, self.nan_values),
190		]:
191			for special_value in special_values:
192				if special_value in self._reserved_chars:
193					raise ValueError(f"cannot use reserved char as {name.value}: {special_value}")
194
195				if name != _SpecialValue.NONE and self.is_none(special_value):
196					raise ValueError(f"cannot use None value as {name.value}: {special_value}")
197
198				if (
199					(name == _SpecialValue.TRUE and self.parse_bool(special_value) != True) or
200					(name == _SpecialValue.FALSE and self.parse_bool(special_value) != False) or
201					(name != _SpecialValue.TRUE and name != _SpecialValue.FALSE and self.is_bool(special_value))
202				):
203					raise ValueError(f"cannot use bool value as {name.value}: {special_value}")
204
205				if self.is_int(special_value):
206					raise ValueError(f"cannot use int value as {name.value}: {special_value}")
207
208				if self.use_decimal:
209					if (
210						(name == _SpecialValue.INF and self.parse_decimal(special_value) != Decimal(math.inf)) or
211						(name == _SpecialValue.NAN and not self.parse_decimal(special_value).is_nan()) or
212						(name != _SpecialValue.INF and name != _SpecialValue.NAN and self.is_float(special_value))
213					):
214						raise ValueError(f"cannot use Decimal value as {name}: {special_value}")
215				else:
216					if (
217						(name == _SpecialValue.INF and self.parse_float(special_value) != math.inf) or
218						(name == _SpecialValue.NAN and self.parse_float(special_value) is not math.nan) or
219						(name != _SpecialValue.INF and name != _SpecialValue.NAN and self.is_float(special_value))
220					):
221						raise ValueError(f"cannot use float value as {name}: {special_value}")

Initialise a new parser

Parameters

trim : whether leading and trailing whitespace should be stripped from strings

use_decimal : whether non-integer numeric values should be inferred as Decimal (exact values) instead of float (non-exact values). Note that this only applies to methods that attempt to infer type (infer() and infer_*()), and does not affect methods where the type is explicitly specified (is_float(), is_decimal(), parse_float(), parse_decimal()).

list_delimiter : the delimiter used for identifying lists and for separating list items. If set to None, the parser will not attempt to identify lists when inferring types, which usually causes the value to be treated as a str instead.

none_values : list of strings that represent the value None

none_case_sensitive : whether matches against none_values should be made in a case-sensitive manner

true_values : list of strings that represent the bool value True

false_values : list of strings that represent the bool value False

bool_case_sensitive : whether matches against true_values and false_values should be made in a case-sensitive manner

int_case_sensitive : whether checks for int should be done in a case-sensitive manner. This usually only applies to values given in scientific notation, where the mantissa and exponent usually are separated by e.

inf_values : list of strings that represent the float or Decimal value of infinity. Each of the strings can be prepended with a negative sign to represent negative infinity also.

nan_values : list of strings that represent a float or Decimal that is NaN (not a number)

float_case_sensitive : whether checks for float should be done in a case-sensitive manner. This applies to matches against inf_values and nan_values, as well as to values given in scientific notation, where the mantissa and exponent are usually separated by e.

case_sensitive : whether all matches should be made in a case-sensitive manner. Sets all of none_case_sensitive, bool_case_sensitive, int_case_sensitive, float_case_sensitive to the same value, ignoring any individual settings.

Raises

ValueError if any of the options would lead to ambiguities during parsing

def is_none(self, value: str) -> bool:
224	def is_none(self, value: str) -> bool:
225		"""
226			Check if a string represents the value None
227
228			Only strings that match the values in `self.none_values` will be interpreted as None. The default accepted values are `[""]`, i.e. an empty string. The case sensitivity of this matching depends on `self.none_case_sensitive`, which is False by default.
229
230			Parameters
231			----------
232			`value`
233			: string to be checked
234
235			Returns
236			-------
237			whether it is None
238
239			Examples
240			--------
241			```python
242			parser = TypeParser()
243			parser.parse_bool("")     # True
244			parser.parse_bool("abc")  # False
245			```
246		"""
247		if self.trim:
248			value = value.strip()
249		if not self.bool_case_sensitive:
250			value = value.lower()
251
252		if value in self.none_values:
253			return True
254		else:
255			return False

Check if a string represents the value None

Only strings that match the values in self.none_values will be interpreted as None. The default accepted values are [""], i.e. an empty string. The case sensitivity of this matching depends on self.none_case_sensitive, which is False by default.

Parameters

value : string to be checked

Returns

whether it is None

Examples

parser = TypeParser()
parser.parse_bool("")     # True
parser.parse_bool("abc")  # False
def is_bool(self, value: str) -> bool:
258	def is_bool(self, value: str) -> bool:
259		"""
260			Check if a string represents a bool
261
262			Only strings that match the values in `self.true_values` and `self.false_values` will be interpreted as booleans. The default accepted values are `["true"]` and `["false"]` respectively. The case sensitivity of this matching depends on `self.bool_case_sensitive`, which is False by default.
263
264			Parameters
265			----------
266			`value`
267			: string to be checked
268
269			Returns
270			-------
271			whether it is a bool
272
273			Examples
274			--------
275			```python
276			parser = TypeParser()
277			parser.is_bool("true")  # True
278			parser.is_bool("")      # True
279			parser.is_bool("abc")   # False
280			```
281		"""
282		if self.trim:
283			value = value.strip()
284
285		if not self.bool_case_sensitive:
286			value = value.lower()
287		if value in self.true_values:
288			return True
289		if value in self.false_values:
290			return True
291
292		return False

Check if a string represents a bool

Only strings that match the values in self.true_values and self.false_values will be interpreted as booleans. The default accepted values are ["true"] and ["false"] respectively. The case sensitivity of this matching depends on self.bool_case_sensitive, which is False by default.

Parameters

value : string to be checked

Returns

whether it is a bool

Examples

parser = TypeParser()
parser.is_bool("true")  # True
parser.is_bool("")      # True
parser.is_bool("abc")   # False
def is_int( self, value: str, *, allow_sign: bool = True, allow_negative: bool = True, allow_scientific: bool = True) -> bool:
295	def is_int(self, value: str, *, allow_sign: bool=True, allow_negative: bool=True, allow_scientific: bool=True) -> bool:
296		"""
297			Check if a string represents an int
298
299			Parameters
300			----------
301			`value`
302			: string to be checked
303
304			`allow_negative`
305			: whether to accept negative values
306
307			`allow_sign`
308			: whether to accept signed values. If False, it implies that `allow_negative` is False also.
309
310			`allow_scientific`
311			: whether to accept scientific notation. If True, strings of the form `"<var>M</var>e<var>X</var>"` will be interpreted as the expression `<var>M</var> * (10 ** <var>X</var>)`, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note <var>M</var> *must* be an integer and <var>X</var> *must* be a non-negative integer, even in cases where the above expression evaluates mathematically to an integer.
312
313			Returns
314			-------
315			whether it is an int
316
317			Examples
318			--------
319			```python
320			parser = TypeParser()
321			parser.is_int("0")    # True
322			parser.is_int("-1")   # True
323			parser.is_int("abc")  # False
324			parser.is_int("")     # False
325			```
326		"""
327		if self.trim:
328			value = value.strip()
329
330		if len(value) == 0:
331			return False
332
333		if allow_scientific:
334			value, exp = _decompose_string_pair(value, self._scientific_char, self.int_case_sensitive)
335			if exp is not None:
336				return self.is_int(
337					value, allow_sign=True, allow_negative=allow_negative, allow_scientific=False
338				) and self.is_int(
339					exp, allow_sign=True, allow_negative=False, allow_scientific=False
340				)
341
342		if value[0] in self._sign_chars:
343			if len(value) == 1:
344				return False
345			if not allow_sign:
346				return False
347			if not allow_negative and value[0] in self._negative_chars:
348				return False
349			value = value[1:]
350		if value[0] in self._digit_separators or value[-1] in self._digit_separators:
351			return False
352
353		prev_separated = False
354		for c in value:
355			if c in self._digit_separators:
356				if prev_separated:
357					return False
358				prev_separated = True
359			else:
360				prev_separated = False
361				if c not in self._digit_chars:
362					return False
363		return True

Check if a string represents an int

Parameters

value : string to be checked

allow_negative : whether to accept negative values

allow_sign : whether to accept signed values. If False, it implies that allow_negative is False also.

allow_scientific : whether to accept scientific notation. If True, strings of the form "<var>M</var>e<var>X</var>" will be interpreted as the expression <var>M</var> * (10 ** <var>X</var>), where M is the mantissa/significand and X is the exponent. Note M must be an integer and X must be a non-negative integer, even in cases where the above expression evaluates mathematically to an integer.

Returns

whether it is an int

Examples

parser = TypeParser()
parser.is_int("0")    # True
parser.is_int("-1")   # True
parser.is_int("abc")  # False
parser.is_int("")     # False
def is_float( self, value: str, *, allow_scientific: bool = True, allow_inf: bool = True, allow_nan: bool = True) -> bool:
366	def is_float(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> bool:
367		"""
368			Check if a string represents a float (or equivalently, a Decimal)
369
370			This function will also return True if the string represents an int.
371
372			Alias: `is_decimal()`
373
374			Parameters
375			----------
376			`value`
377			: string to be checked
378
379			`allow_scientific`
380			: whether to accept scientific notation. If True, strings of the form `"<var>M</var>e<var>X</var>"` will be interpreted as the expression `<var>M</var> * (10 ** <var>X</var>)`, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>X</var> must be an integer, but can be negative.
381
382			`allow_inf`
383			: whether to accept positive and negative infinity values. If True, strings that match the values in `self.inf_values` (empty by default) are interpreted as infinity, or as negative infinity if prepended by a negative sign. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
384
385			`allow_nan`
386			: whether to accept NaN (not a number) representations. If True, strings that match the values in `self.nan_values` (empty by default) are interpeted as NaN. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
387
388			Returns
389			-------
390			whether it is a float or Decimal
391
392			Examples
393			--------
394			```python
395			parser = TypeParser()
396			parser.is_float("1.")       # True
397			parser.is_float("12.3e-2")  # True
398			parser.is_float("abc")      # False
399			parser.is_float("")         # False
400			```
401		"""
402		if self.trim:
403			value = value.strip()
404
405		if len(value) > 0 and value[0] in self._sign_chars:
406			value = value[1:]
407
408		if self.float_case_sensitive:
409			special_value = value
410		else:
411			special_value = value.lower()
412		if allow_inf and special_value in self.inf_values:
413			return True
414		if allow_nan and special_value in self.nan_values:
415			return True
416
417		if len(value) == 0:
418			return False
419
420		if allow_scientific:
421			value, exp = _decompose_string_pair(value, self._scientific_char, self.float_case_sensitive)
422			if exp is not None:
423				return self.is_float(value, allow_scientific=False, allow_inf=False, allow_nan=False) and self.is_int(exp, allow_sign=True, allow_negative=True, allow_scientific=False)
424
425		value, frac = _decompose_string_pair(value, self._float_separator, self.float_case_sensitive)
426		if frac is not None:
427			if value == "" and frac == "":
428				return False
429			return (
430				self.is_int(value, allow_sign=True, allow_negative=False, allow_scientific=False) or value == ""
431			) and (
432				self.is_int(frac, allow_sign=False, allow_negative=False, allow_scientific=False) or frac == ""
433			)
434
435		return self.is_int(value, allow_sign=True, allow_negative=True, allow_scientific=False)

Check if a string represents a float (or equivalently, a Decimal)

This function will also return True if the string represents an int.

Alias: is_decimal()

Parameters

value : string to be checked

allow_scientific : whether to accept scientific notation. If True, strings of the form "<var>M</var>e<var>X</var>" will be interpreted as the expression <var>M</var> * (10 ** <var>X</var>), where M is the mantissa/significand and X is the exponent. Note that X must be an integer, but can be negative.

allow_inf : whether to accept positive and negative infinity values. If True, strings that match the values in self.inf_values (empty by default) are interpreted as infinity, or as negative infinity if prepended by a negative sign. The case sensitivity of this matching depends on self.float_case_sensitive, which is False by default.

allow_nan : whether to accept NaN (not a number) representations. If True, strings that match the values in self.nan_values (empty by default) are interpeted as NaN. The case sensitivity of this matching depends on self.float_case_sensitive, which is False by default.

Returns

whether it is a float or Decimal

Examples

parser = TypeParser()
parser.is_float("1.")       # True
parser.is_float("12.3e-2")  # True
parser.is_float("abc")      # False
parser.is_float("")         # False
def is_decimal( self, value: str, *, allow_scientific: bool = True, allow_inf: bool = True, allow_nan: bool = True) -> bool:
438	def is_decimal(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> bool:
439		"""
440			Alias of `is_float()`
441		"""
442		return self.is_float(value, allow_scientific=allow_scientific, allow_inf=allow_inf, allow_nan=allow_nan)

Alias of is_float()

def parse_none(self, value: str) -> None:
445	def parse_none(self, value: str) -> None:
446		"""
447			Parse a string and return it as the value None if possible
448
449			Only strings that match the values in `self.none_values` will be interpreted as None. The default accepted values are `[""]`, i.e. an empty string. The case sensitivity of this matching depends on `self.none_case_sensitive`, which is False by default.
450
451			Parameters
452			----------
453			`value`
454			: string to be parsed
455
456			Returns
457			-------
458			parsed None value
459
460			Raises
461			------
462			`ValueError` if `value` cannot be parsed
463
464			Examples
465			--------
466			```python
467			parser = TypeParser()
468			parser.parse_bool("")     # None
469			parser.parse_bool("abc")  # raises ValueError
470			```
471		"""
472		if self.is_none(value):
473			return None
474		else:
475			raise ValueError(f"not a none value: {value}")

Parse a string and return it as the value None if possible

Only strings that match the values in self.none_values will be interpreted as None. The default accepted values are [""], i.e. an empty string. The case sensitivity of this matching depends on self.none_case_sensitive, which is False by default.

Parameters

value : string to be parsed

Returns

parsed None value

Raises

ValueError if value cannot be parsed

Examples

parser = TypeParser()
parser.parse_bool("")     # None
parser.parse_bool("abc")  # raises ValueError
def parse_bool(self, value: str) -> bool:
478	def parse_bool(self, value: str) -> bool:
479		"""
480			Parse a string and return it as a bool if possible
481
482			Only strings that match the values in `self.true_values` and `self.false_values` will be interpreted as booleans. The default accepted values are `["true"]` and `["false"]` respectively. The case sensitivity of this matching depends on `self.bool_case_sensitive`, which is False by default.
483
484			Parameters
485			----------
486			`value`
487			: string to be parsed
488
489			Returns
490			-------
491			parsed bool value
492
493			Raises
494			------
495			`ValueError` if `value` cannot be parsed
496
497			Examples
498			--------
499			```python
500			parser = TypeParser()
501			parser.parse_bool("true")   # True
502			parser.parse_bool("FALSE")  # False
503			```
504		"""
505		if self.trim:
506			value = value.strip()
507
508		if self.bool_case_sensitive:
509			special_value = value
510		else:
511			special_value = value.lower()
512		if special_value in self.true_values:
513			return True
514		if special_value in self.false_values:
515			return False
516
517		raise ValueError(f"not a boolean: {value}")

Parse a string and return it as a bool if possible

Only strings that match the values in self.true_values and self.false_values will be interpreted as booleans. The default accepted values are ["true"] and ["false"] respectively. The case sensitivity of this matching depends on self.bool_case_sensitive, which is False by default.

Parameters

value : string to be parsed

Returns

parsed bool value

Raises

ValueError if value cannot be parsed

Examples

parser = TypeParser()
parser.parse_bool("true")   # True
parser.parse_bool("FALSE")  # False
def parse_int(self, value: str, *, allow_scientific: bool = True) -> int:
520	def parse_int(self, value: str, *, allow_scientific: bool=True) -> int:
521		"""
522			Parse a string and return it as an int if possible
523
524			If the string represents a bool, it will be converted to `1` for True and `0` for False.
525
526			Parameters
527			----------
528			`value`
529			: string to be parsed
530
531			`allow_scientific`
532			: whether to accept scientific notation. If True, strings of the form `"<var>M</var>e<var>X</var>"` will be interpreted as the expression `<var>M</var> * (10 ** <var>X</var>)`, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note <var>M</var> *must* be an integer and <var>X</var> *must* be a non-negative integer, even in cases where the above expression evaluates mathematically to an integer.
533
534			Returns
535			-------
536			parsed int value
537
538			Raises
539			------
540			`ValueError` if `value` cannot be parsed
541
542			Examples
543			--------
544			```python
545			parser = TypeParser()
546			parser.parse_int("0")    # 0
547			parser.parse_int("-1")   # -1
548			parser.parse_int("2e3")  # 2000
549			```
550		"""
551		if self.trim:
552			value = value.strip()
553
554		if self.is_int(value, allow_sign=True, allow_negative=True, allow_scientific=allow_scientific):
555			if allow_scientific:
556				value, exp = _decompose_string_pair(value, self._scientific_char, self.int_case_sensitive)
557				if exp is not None:
558					if value[0] in (self._negative_chars - {self._negative_char}):
559						value = self._negative_char + value[1:]
560					return int(value) * (10 ** int(exp))
561
562			if value[0] in (self._negative_chars - {self._negative_char}):
563				value = self._negative_char + value[1:]
564			return int(value)
565
566		elif self.is_bool(value):
567			return int(self.parse_bool(value))
568		else:
569			raise ValueError(f"not an integer: {value}")

Parse a string and return it as an int if possible

If the string represents a bool, it will be converted to 1 for True and 0 for False.

Parameters

value : string to be parsed

allow_scientific : whether to accept scientific notation. If True, strings of the form "<var>M</var>e<var>X</var>" will be interpreted as the expression <var>M</var> * (10 ** <var>X</var>), where M is the mantissa/significand and X is the exponent. Note M must be an integer and X must be a non-negative integer, even in cases where the above expression evaluates mathematically to an integer.

Returns

parsed int value

Raises

ValueError if value cannot be parsed

Examples

parser = TypeParser()
parser.parse_int("0")    # 0
parser.parse_int("-1")   # -1
parser.parse_int("2e3")  # 2000
def parse_float( self, value: str, *, allow_scientific: bool = True, allow_inf: bool = True, allow_nan: bool = True) -> float:
617	def parse_float(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> float:
618		"""
619			Parse a string and return it as a (non-exact) float if possible
620
621			If the string represents a bool, it will be converted to `1.` for True and `0.` for False. If the string represents an int, it will be converted to a float also.
622
623			Behaves analogously to `parse_decimal()`, except that that returns an exact Decimal instead.
624
625			Parameters
626			----------
627			`value`
628			: string to be parsed
629
630			`allow_scientific`
631			: whether to accept scientific notation. If True, strings of the form `"<var>M</var>e<var>X</var>"` will be interpreted as the expression `<var>M</var> * (10 ** <var>X</var>)`, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>X</var> must be an integer, but can be negative.
632
633			`allow_inf`
634			: whether to accept positive and negative infinity values. If True, strings that match the values in `self.inf_values` (empty by default) are interpreted as infinity, or as negative infinity if prepended by a negative sign. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
635
636			`allow_nan`
637			: whether to accept NaN (not a number) representations. If True, strings that match the values in `self.nan_values` (empty by default) are interpeted as NaN. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
638
639			Returns
640			-------
641			parsed float value
642
643			Raises
644			------
645			`ValueError` if `value` cannot be parsed
646
647			Examples
648			--------
649			```python
650			parser = TypeParser(inf_values=["inf"], nan_values=["nan"])
651			parser.parse_float("1.")       # 1.
652			parser.parse_float("1.23e2")   # 123.
653			parser.parse_float("1.23e-2")  # 0.0123
654			parser.parse_float("inf")      # math.inf
655			```
656		"""
657		return self._parse_floatlike(value, float, math.inf, math.nan,
658			allow_scientific=allow_scientific,
659			allow_inf=allow_inf,
660			allow_nan=allow_nan,
661		)

Parse a string and return it as a (non-exact) float if possible

If the string represents a bool, it will be converted to 1. for True and 0. for False. If the string represents an int, it will be converted to a float also.

Behaves analogously to parse_decimal(), except that that returns an exact Decimal instead.

Parameters

value : string to be parsed

allow_scientific : whether to accept scientific notation. If True, strings of the form "<var>M</var>e<var>X</var>" will be interpreted as the expression <var>M</var> * (10 ** <var>X</var>), where M is the mantissa/significand and X is the exponent. Note that X must be an integer, but can be negative.

allow_inf : whether to accept positive and negative infinity values. If True, strings that match the values in self.inf_values (empty by default) are interpreted as infinity, or as negative infinity if prepended by a negative sign. The case sensitivity of this matching depends on self.float_case_sensitive, which is False by default.

allow_nan : whether to accept NaN (not a number) representations. If True, strings that match the values in self.nan_values (empty by default) are interpeted as NaN. The case sensitivity of this matching depends on self.float_case_sensitive, which is False by default.

Returns

parsed float value

Raises

ValueError if value cannot be parsed

Examples

parser = TypeParser(inf_values=["inf"], nan_values=["nan"])
parser.parse_float("1.")       # 1.
parser.parse_float("1.23e2")   # 123.
parser.parse_float("1.23e-2")  # 0.0123
parser.parse_float("inf")      # math.inf
def parse_decimal( self, value: str, *, allow_scientific: bool = True, allow_inf: bool = True, allow_nan: bool = True) -> decimal.Decimal:
664	def parse_decimal(self, value: str, *, allow_scientific: bool=True, allow_inf: bool=True, allow_nan: bool=True) -> Decimal:
665		"""
666			Parse a string and return it as an exact Decimal if possible
667
668			If the string represents a bool, it will be converted to `Decimal(1)` for True and `Decimal(0)` for False. If the string represents an int, it will be converted to a Decimal also.
669
670			Behaves analogously to `parse_float()`, except that that returns a non-exact float instead.
671
672			Parameters
673			----------
674			`value`
675			: string to be parsed
676
677			`allow_scientific`
678			: whether to accept scientific notation. If True, strings of the form `"<var>M</var>e<var>X</var>"` will be interpreted as the expression `<var>M</var> * (10 ** <var>X</var>)`, where <var>M</var> is the mantissa/significand and <var>X</var> is the exponent. Note that <var>X</var> must be an integer, but can be negative.
679
680			`allow_inf`
681			: whether to accept positive and negative infinity values. If True, strings that match the values in `self.inf_values` (empty by default) are interpreted as infinity, or as negative infinity if prepended by a negative sign. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
682
683			`allow_nan`
684			: whether to accept NaN (not a number) representations. If True, strings that match the values in `self.nan_values` (empty by default) are interpeted as NaN. The case sensitivity of this matching depends on `self.float_case_sensitive`, which is False by default.
685
686			Returns
687			-------
688			parsed Decimal value
689
690			Raises
691			------
692			`ValueError` if `value` cannot be parsed
693
694			Examples
695			--------
696			```python
697			parser = TypeParser(inf_values=["inf"], nan_values=["nan"])
698			parser.parse_decimal("1.")       # Decimal(1)
699			parser.parse_decimal("1.23e2")   # Decimal(123)
700			parser.parse_decimal("1.23e-2")  # Decimal(123) / Decimal(10000)
701			parser.parse_decimal("inf")      # Decimal(math.inf)
702			```
703		"""
704		return self._parse_floatlike(value, Decimal, Decimal(math.inf), Decimal(math.nan),
705			allow_scientific=allow_scientific,
706			allow_inf=allow_inf,
707			allow_nan=allow_nan,
708		)

Parse a string and return it as an exact Decimal if possible

If the string represents a bool, it will be converted to Decimal(1) for True and Decimal(0) for False. If the string represents an int, it will be converted to a Decimal also.

Behaves analogously to parse_float(), except that that returns a non-exact float instead.

Parameters

value : string to be parsed

allow_scientific : whether to accept scientific notation. If True, strings of the form "<var>M</var>e<var>X</var>" will be interpreted as the expression <var>M</var> * (10 ** <var>X</var>), where M is the mantissa/significand and X is the exponent. Note that X must be an integer, but can be negative.

allow_inf : whether to accept positive and negative infinity values. If True, strings that match the values in self.inf_values (empty by default) are interpreted as infinity, or as negative infinity if prepended by a negative sign. The case sensitivity of this matching depends on self.float_case_sensitive, which is False by default.

allow_nan : whether to accept NaN (not a number) representations. If True, strings that match the values in self.nan_values (empty by default) are interpeted as NaN. The case sensitivity of this matching depends on self.float_case_sensitive, which is False by default.

Returns

parsed Decimal value

Raises

ValueError if value cannot be parsed

Examples

parser = TypeParser(inf_values=["inf"], nan_values=["nan"])
parser.parse_decimal("1.")       # Decimal(1)
parser.parse_decimal("1.23e2")   # Decimal(123)
parser.parse_decimal("1.23e-2")  # Decimal(123) / Decimal(10000)
parser.parse_decimal("inf")      # Decimal(math.inf)
def infer( self, value: str) -> Type[Union[str, int, float, decimal.Decimal, bool, NoneType, parsetypes._common.Nullable[str], parsetypes._common.Nullable[int], parsetypes._common.Nullable[float], parsetypes._common.Nullable[decimal.Decimal], parsetypes._common.Nullable[bool], parsetypes._common.Nullable[NoneType], list[str], list[int], list[float], list[decimal.Decimal], list[bool], list[None], list[parsetypes._common.Nullable[str]], list[parsetypes._common.Nullable[int]], list[parsetypes._common.Nullable[float]], list[parsetypes._common.Nullable[decimal.Decimal]], list[parsetypes._common.Nullable[bool]], list[parsetypes._common.Nullable[NoneType]]]]:
711	def infer(self, value: str) -> AnyValueType:
712		"""
713			Infer the underlying type of a string
714
715			Also check for inline lists if `self.list_delimiter` is not None.
716
717			Parameters
718			----------
719			`value`
720			: the string for which the type should be inferred
721
722			Returns
723			-------
724			inferred type
725
726			Examples
727			--------
728			```python
729			parser = TypeParser()
730			parser.infer("true")  # bool
731			parser.infer("2.0")   # float
732			parser.infer("abc")   # str
733			```
734		"""
735		if self.is_none(value):
736			return NoneType
737		if self.is_bool(value):
738			return bool
739		if self.is_int(value):
740			return int
741		if self.is_float(value):
742			if self.use_decimal:
743				return Decimal
744			else:
745				return float
746
747		if self.trim:
748			value = value.strip()
749
750		if self.list_delimiter is not None and self.list_delimiter in value:
751			subvalues = value.split(self.list_delimiter)
752			if self.trim:
753				subvalues = [subvalue.strip() for subvalue in subvalues]
754			reduced_type = reduce_types(self.infer(subvalue) for subvalue in subvalues)
755			reduced_type = cast(AnyContainedType, reduced_type)
756			r = list[reduced_type]
757			return r  # type: ignore
758
759		return GenericValue

Infer the underlying type of a string

Also check for inline lists if self.list_delimiter is not None.

Parameters

value : the string for which the type should be inferred

Returns

inferred type

Examples

parser = TypeParser()
parser.infer("true")  # bool
parser.infer("2.0")   # float
parser.infer("abc")   # str
def infer_series( self, values: Iterable[str]) -> Type[Union[str, int, float, decimal.Decimal, bool, NoneType, parsetypes._common.Nullable[str], parsetypes._common.Nullable[int], parsetypes._common.Nullable[float], parsetypes._common.Nullable[decimal.Decimal], parsetypes._common.Nullable[bool], parsetypes._common.Nullable[NoneType], list[str], list[int], list[float], list[decimal.Decimal], list[bool], list[None], list[parsetypes._common.Nullable[str]], list[parsetypes._common.Nullable[int]], list[parsetypes._common.Nullable[float]], list[parsetypes._common.Nullable[decimal.Decimal]], list[parsetypes._common.Nullable[bool]], list[parsetypes._common.Nullable[NoneType]]]]:
762	def infer_series(self, values: Iterable[str]) -> AnyValueType:
763		"""
764			Infer the underlying common type of a series of strings
765
766			If the values in the series do not have the same apparent type, the resulting type will be narrowest possible type that will encompass all values in the series. See `parsetypes.reduce_types()` for more information.
767
768			Parameters
769			----------
770			`values`
771			: series of strings for which the type should be inferred
772
773			Returns
774			-------
775			inferred type
776
777			Examples
778			--------
779			```python
780			parser = TypeParser()
781			parser.infer_series(["1", "2", "3.4"])       # float
782			parser.infer_series(["true", "false", "2"])  # int
783			parser.infer_series(["1", "2.3", "abc"])     # str
784			```
785		"""
786		return reduce_types(self.infer(value) for value in values)

Infer the underlying common type of a series of strings

If the values in the series do not have the same apparent type, the resulting type will be narrowest possible type that will encompass all values in the series. See parsetypes.reduce_types() for more information.

Parameters

values : series of strings for which the type should be inferred

Returns

inferred type

Examples

parser = TypeParser()
parser.infer_series(["1", "2", "3.4"])       # float
parser.infer_series(["true", "false", "2"])  # int
parser.infer_series(["1", "2.3", "abc"])     # str
def infer_table( self, rows: Iterable[Sequence[str]]) -> list[Type[Union[str, int, float, decimal.Decimal, bool, NoneType, parsetypes._common.Nullable[str], parsetypes._common.Nullable[int], parsetypes._common.Nullable[float], parsetypes._common.Nullable[decimal.Decimal], parsetypes._common.Nullable[bool], parsetypes._common.Nullable[NoneType], list[str], list[int], list[float], list[decimal.Decimal], list[bool], list[None], list[parsetypes._common.Nullable[str]], list[parsetypes._common.Nullable[int]], list[parsetypes._common.Nullable[float]], list[parsetypes._common.Nullable[decimal.Decimal]], list[parsetypes._common.Nullable[bool]], list[parsetypes._common.Nullable[NoneType]]]]]:
789	def infer_table(self, rows: Iterable[Sequence[str]]) -> list[AnyValueType]:
790		"""
791			Infer the underlying common type for each column of a table of strings
792
793			For each column, if the values do not have the same apparent type, the resulting type will be narrowest possible type that will encompass all values in the column. See `parsetypes.reduce_types()` for more information.
794
795			Note that the inferred types of every individual value must all be able to fit into memory at once.
796
797			Parameters
798			----------
799			`rows`
800			: table of strings for which the types should be inferred, in row-major order
801
802			Returns
803			-------
804			inferred types
805
806			Examples
807			--------
808			```python
809			parser = TypeParser()
810			parser.infer_table([
811				["1",   "true",  "1"],
812				["2",   "false", "2.3"],
813				["3.4", "2",     "abc"],
814			])
815			# [float, int, str]
816			```
817		"""
818		rows_iter = iter(rows)
819		first_row = next(rows_iter, None)
820		if first_row is None:
821			return []
822
823		num_cols = len(first_row)
824		if num_cols == 0:
825			return []
826
827		table = _TypeTable([[self.infer(value)] for value in first_row])
828		for row in rows_iter:
829			table.add_row([self.infer(value) for value in row])
830
831		return [reduce_types(col) for col in table.cols]

Infer the underlying common type for each column of a table of strings

For each column, if the values do not have the same apparent type, the resulting type will be narrowest possible type that will encompass all values in the column. See parsetypes.reduce_types() for more information.

Note that the inferred types of every individual value must all be able to fit into memory at once.

Parameters

rows : table of strings for which the types should be inferred, in row-major order

Returns

inferred types

Examples

parser = TypeParser()
parser.infer_table([
	["1",   "true",  "1"],
	["2",   "false", "2.3"],
	["3.4", "2",     "abc"],
])
# [float, int, str]
def parse( self, value: str) -> Union[str, int, float, decimal.Decimal, bool, NoneType, parsetypes._common.Nullable[str], parsetypes._common.Nullable[int], parsetypes._common.Nullable[float], parsetypes._common.Nullable[decimal.Decimal], parsetypes._common.Nullable[bool], parsetypes._common.Nullable[NoneType], list[str], list[int], list[float], list[decimal.Decimal], list[bool], list[None], list[parsetypes._common.Nullable[str]], list[parsetypes._common.Nullable[int]], list[parsetypes._common.Nullable[float]], list[parsetypes._common.Nullable[decimal.Decimal]], list[parsetypes._common.Nullable[bool]], list[parsetypes._common.Nullable[NoneType]]]:
870	def parse(self, value: str) -> AnyValue:
871		"""
872			Parse a string and convert it to its underlying type
873
874			Parameters
875			----------
876			`value`
877			: the string to be parsed
878
879			Returns
880			-------
881			converted value
882
883			Examples
884			--------
885			```python
886			parser = TypeParser()
887			parser.parse("true")  # True
888			parser.parse("2.0")   # 2.
889			parser.parse("abc")   # "abc"
890			```
891		"""
892		return self._convert(value, self.infer(value))

Parse a string and convert it to its underlying type

Parameters

value : the string to be parsed

Returns

converted value

Examples

parser = TypeParser()
parser.parse("true")  # True
parser.parse("2.0")   # 2.
parser.parse("abc")   # "abc"
def parse_series( self, values: Iterable[str]) -> list[Union[str, int, float, decimal.Decimal, bool, NoneType, parsetypes._common.Nullable[str], parsetypes._common.Nullable[int], parsetypes._common.Nullable[float], parsetypes._common.Nullable[decimal.Decimal], parsetypes._common.Nullable[bool], parsetypes._common.Nullable[NoneType], list[str], list[int], list[float], list[decimal.Decimal], list[bool], list[None], list[parsetypes._common.Nullable[str]], list[parsetypes._common.Nullable[int]], list[parsetypes._common.Nullable[float]], list[parsetypes._common.Nullable[decimal.Decimal]], list[parsetypes._common.Nullable[bool]], list[parsetypes._common.Nullable[NoneType]]]]:
895	def parse_series(self, values: Iterable[str]) -> list[AnyValue]:
896		"""
897			Parse a series of strings and convert them to their underlying common type
898
899			If the values in the series do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the series. See `parsetypes.reduce_types()` for more information.
900
901			Parameters
902			----------
903			`values`
904			: series of strings to be parsed
905
906			Returns
907			-------
908			converted values
909
910			Examples
911			--------
912			```python
913			parser = TypeParser()
914			parser.parse_series(["1", "2", "3"])        # [1, 2, 3]
915			parser.parse_series(["5", "6.7", "8."])     # [5., 6.7, 8.]
916			parser.parse_series(["true", "false", ""])  # [True, False, None]
917			parser.parse_series(["1", "2.3", "abc"])    # ["1", "2.3", "abc"]
918			```
919		"""
920		inferred = self.infer_series(values)
921		return [self._convert(value, inferred) for value in values]

Parse a series of strings and convert them to their underlying common type

If the values in the series do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the series. See parsetypes.reduce_types() for more information.

Parameters

values : series of strings to be parsed

Returns

converted values

Examples

parser = TypeParser()
parser.parse_series(["1", "2", "3"])        # [1, 2, 3]
parser.parse_series(["5", "6.7", "8."])     # [5., 6.7, 8.]
parser.parse_series(["true", "false", ""])  # [True, False, None]
parser.parse_series(["1", "2.3", "abc"])    # ["1", "2.3", "abc"]
def parse_table( self, rows: Iterable[Sequence[str]]) -> list[list[Union[str, int, float, decimal.Decimal, bool, NoneType, parsetypes._common.Nullable[str], parsetypes._common.Nullable[int], parsetypes._common.Nullable[float], parsetypes._common.Nullable[decimal.Decimal], parsetypes._common.Nullable[bool], parsetypes._common.Nullable[NoneType], list[str], list[int], list[float], list[decimal.Decimal], list[bool], list[None], list[parsetypes._common.Nullable[str]], list[parsetypes._common.Nullable[int]], list[parsetypes._common.Nullable[float]], list[parsetypes._common.Nullable[decimal.Decimal]], list[parsetypes._common.Nullable[bool]], list[parsetypes._common.Nullable[NoneType]]]]]:
924	def parse_table(self, rows: Iterable[Sequence[str]]) -> list[list[AnyValue]]:
925		"""
926			Parse a table of strings and convert them to the underlying common type of each column
927
928			For each column, if the values do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the column. See `parsetypes.reduce_types()` for more information.
929
930			Note that the type inference requires that the inferred types of every individual value must all be able to fit into memory at once.
931
932			This is a function that computes the entire table and returns it all at once. The generator `iterate_table()` behaves analogously, except that it computes and yields each row one at a time.
933
934			Parameters
935			----------
936			`rows`
937			: table of strings to be parsed, in row-major order
938
939			`iterator`
940			: whether the parsed values should be yielded as an iterator. If False, which is the default, the entire table is computed and returned as a list of lists. If True, this function behaves as a generator, and the rows of the table are computed and yielded one at a time. However, note that even when set to True, the type inference requires that inferred type of each individual value must all be able to fit into memory at once.
941
942			Returns
943			-------
944			converted table of values, in row-major order
945
946			Examples
947			--------
948			```python
949			parser = TypeParser()
950			table = parser.parse_table([
951				["1", "5",   "true",  "1"],
952				["2", "6.7", "false", "2.3"],
953				["3", "8.0", "",     "abc"],
954			]):
955			assert table == [
956				[1, 5.,  True,  "1"],
957				[2, 6.7, False, "2.3"],
958				[3, 8.,  None,  "abc"],
959			]
960			```
961		"""
962		return [converted_row for converted_row in self.iterate_table(rows)]

Parse a table of strings and convert them to the underlying common type of each column

For each column, if the values do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the column. See parsetypes.reduce_types() for more information.

Note that the type inference requires that the inferred types of every individual value must all be able to fit into memory at once.

This is a function that computes the entire table and returns it all at once. The generator iterate_table() behaves analogously, except that it computes and yields each row one at a time.

Parameters

rows : table of strings to be parsed, in row-major order

iterator : whether the parsed values should be yielded as an iterator. If False, which is the default, the entire table is computed and returned as a list of lists. If True, this function behaves as a generator, and the rows of the table are computed and yielded one at a time. However, note that even when set to True, the type inference requires that inferred type of each individual value must all be able to fit into memory at once.

Returns

converted table of values, in row-major order

Examples

parser = TypeParser()
table = parser.parse_table([
	["1", "5",   "true",  "1"],
	["2", "6.7", "false", "2.3"],
	["3", "8.0", "",     "abc"],
]):
assert table == [
	[1, 5.,  True,  "1"],
	[2, 6.7, False, "2.3"],
	[3, 8.,  None,  "abc"],
]
def iterate_table( self, rows: Iterable[Sequence[str]]) -> Iterator[list[Union[str, int, float, decimal.Decimal, bool, NoneType, parsetypes._common.Nullable[str], parsetypes._common.Nullable[int], parsetypes._common.Nullable[float], parsetypes._common.Nullable[decimal.Decimal], parsetypes._common.Nullable[bool], parsetypes._common.Nullable[NoneType], list[str], list[int], list[float], list[decimal.Decimal], list[bool], list[None], list[parsetypes._common.Nullable[str]], list[parsetypes._common.Nullable[int]], list[parsetypes._common.Nullable[float]], list[parsetypes._common.Nullable[decimal.Decimal]], list[parsetypes._common.Nullable[bool]], list[parsetypes._common.Nullable[NoneType]]]]]:
965	def iterate_table(self, rows: Iterable[Sequence[str]]) -> Iterator[list[AnyValue]]:
966		"""
967			Parse a table of strings for the underlying common type of each column, then convert and yield each row
968
969			For each column, if the values do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the column. See `parsetypes.reduce_types()` for more information.
970
971			This is a generator that computes and yields each row one at a time. The function `parse_table()` behaves analogously, except that it computes the entire table and returns it as a list of lists. However, note that although this is a generator, the type inference still requires that the inferred types of every individual value must all be able to fit into memory at once.
972
973			Parameters
974			----------
975			`rows`
976			: table of strings to be parsed, in row-major order
977
978			Yields
979			-------
980			each row of converted table values
981
982			Examples
983			--------
984			```python
985			parser = TypeParser()
986			table = parser.iterate_table([
987				["1",   "true",  "1"],
988				["2",   "false", "2.3"],
989				["3.4", "2",     "abc"],
990			]):
991			assert next(table) == [1.,  1, "1"]
992			assert next(table) == [2.,  0, "2.3"]
993			assert next(table) == [3.4, 2, "abc"]
994			```
995		"""
996		inferred_types = self.infer_table(rows)
997
998		for row in rows:
999			yield [self._convert(value, inferred) for value, inferred in zip(row, inferred_types)]

Parse a table of strings for the underlying common type of each column, then convert and yield each row

For each column, if the values do not have the same apparent type, the common type is taken as the narrowest possible type that will encompass all values in the column. See parsetypes.reduce_types() for more information.

This is a generator that computes and yields each row one at a time. The function parse_table() behaves analogously, except that it computes the entire table and returns it as a list of lists. However, note that although this is a generator, the type inference still requires that the inferred types of every individual value must all be able to fit into memory at once.

Parameters

rows : table of strings to be parsed, in row-major order

Yields

each row of converted table values

Examples

parser = TypeParser()
table = parser.iterate_table([
	["1",   "true",  "1"],
	["2",   "false", "2.3"],
	["3.4", "2",     "abc"],
]):
assert next(table) == [1.,  1, "1"]
assert next(table) == [2.,  0, "2.3"]
assert next(table) == [3.4, 2, "abc"]
Inherited Members
builtins.object
__new__
__repr__
__hash__
__str__
__getattribute__
__setattr__
__delattr__
__lt__
__le__
__eq__
__ne__
__gt__
__ge__
__reduce_ex__
__reduce__
__getstate__
__subclasshook__
__init_subclass__
__format__
__sizeof__
__dir__
def reduce_types( types: Iterable[Type[Union[str, int, float, decimal.Decimal, bool, NoneType, parsetypes._common.Nullable[str], parsetypes._common.Nullable[int], parsetypes._common.Nullable[float], parsetypes._common.Nullable[decimal.Decimal], parsetypes._common.Nullable[bool], parsetypes._common.Nullable[NoneType], list[str], list[int], list[float], list[decimal.Decimal], list[bool], list[None], list[parsetypes._common.Nullable[str]], list[parsetypes._common.Nullable[int]], list[parsetypes._common.Nullable[float]], list[parsetypes._common.Nullable[decimal.Decimal]], list[parsetypes._common.Nullable[bool]], list[parsetypes._common.Nullable[NoneType]]]]]) -> Type[Union[str, int, float, decimal.Decimal, bool, NoneType, parsetypes._common.Nullable[str], parsetypes._common.Nullable[int], parsetypes._common.Nullable[float], parsetypes._common.Nullable[decimal.Decimal], parsetypes._common.Nullable[bool], parsetypes._common.Nullable[NoneType], list[str], list[int], list[float], list[decimal.Decimal], list[bool], list[None], list[parsetypes._common.Nullable[str]], list[parsetypes._common.Nullable[int]], list[parsetypes._common.Nullable[float]], list[parsetypes._common.Nullable[decimal.Decimal]], list[parsetypes._common.Nullable[bool]], list[parsetypes._common.Nullable[NoneType]]]]:
157def reduce_types(types: Iterable[AnyValueType]) -> AnyValueType:
158	"""
159		Reduce multiple types into a single common type.
160
161		If the input types are not all the same, the resulting type will be narrowest possible type that will encompass all of the input types.
162
163		This operation is useful in cases such as parsing a CSV file where each column should have a consistent type, but where the individual values in a column could be interpreted variously as ints or floats (or other types).
164
165		Parameters
166		----------
167		`types`
168		: types to be reduced
169
170		Returns
171		-------
172		common reduced type
173
174		Examples
175		--------
176		```python
177		reduce_types([int, float])        # float
178		reduce_types([bool, int])         # int
179		reduce_types([int, float, str])   # str
180		```
181	"""
182	reduced_type: AnyValueType | None = None
183	for t in types:
184		if reduced_type is None:
185			reduced_type = t
186		elif t != reduced_type:
187			reduced_type = _merge_types(reduced_type, t)
188		if reduced_type == _TerminalValue:
189			return _TerminalValue
190
191	if reduced_type is None:
192		# types is empty
193		return GenericValue
194	else:
195		return reduced_type

Reduce multiple types into a single common type.

If the input types are not all the same, the resulting type will be narrowest possible type that will encompass all of the input types.

This operation is useful in cases such as parsing a CSV file where each column should have a consistent type, but where the individual values in a column could be interpreted variously as ints or floats (or other types).

Parameters

types : types to be reduced

Returns

common reduced type

Examples

reduce_types([int, float])        # float
reduce_types([bool, int])         # int
reduce_types([int, float, str])   # str