How to Use ?

Installation

pip install jparse

or

pip install git+https://github.com/elisong/jparse.git#egg=jparse

Show Cases

Initialization

>>> from jparse import JParser
>>> jp = JParser()

>>> TEST_CASE1 = [{'A1': 1, 'A2': 2, 'A3': 3},
                  {'A1': [4, 5, 6], 'A2': 7, 'A3': 'x'}]

>>> TEST_CASE2 = {'A1': [{'B1': 4, 'B2': 5},
                         {'B1': 6, 'B3': 7}],
                  'A2': {'C1': [8, 9],
                         'C2': [10, 11]},
                  'A3': {'A1': {'B4': 12},
                        {'A4': 10}}

flatten - flatten thoroughly

  • default
>>> print(jp.flatten(TEST_CASE1))
defaultdict(None, {'0_A1': 1,
                   '0_A2': 2,
                   '0_A3': 3,
                   '1_A1_0': 4,
                   '1_A1_1': 5,
                   '1_A1_2': 6,
                   '1_A2': 7,
                   '1_A3': 'x'})

>>> print(jp.flatten(TEST_CASE2))
defaultdict(None, {'A1_0_B1': 4,
                   'A1_0_B2': 5,
                   'A1_1_B1': 6,
                   'A1_1_B3': 7,
                   'A2_C1_0': 8,
                   'A2_C1_1': 9,
                   'A2_C2_0': 10,
                   'A2_C2_1': 11,
                   'A3_A1_B4': 12,
                   'A3_A4': 10})
  • add prefix
>>> print(jp.flatten(TEST_CASE1, prefix='F'))
defaultdict(None, {'F_0_A1': 1,
                   'F_0_A2': 2,
                   'F_0_A3': 3,
                   'F_1_A1_0': 4,
                   'F_1_A1_1': 5,
                   'F_1_A1_2': 6,
                   'F_1_A2': 7,
                   'F_1_A3': 'x'})

>>> print(jp.flatten(TEST_CASE2, prefix='F'))
defaultdict(None, {'F_A1_0_B1': 4,
                   'F_A1_0_B2': 5,
                   'F_A1_1_B1': 6,
                   'F_A1_1_B3': 7,
                   'F_A2_C1_0': 8,
                   'F_A2_C1_1': 9,
                   'F_A2_C2_0': 10,
                   'F_A2_C2_1': 11,
                   'F_A3_A4': 10,
                   'F_A3_A1_B4': 12})

flatten_seq - flatten until encounter MutableMapping object

  • default
>>> print(jp.flatten_seq(TEST_CASE1))
defaultdict(None, {'0': {'A1': 1,
                         'A2': 2,
                         'A3': 3},
                   '1': {'A1': [4, 5, 6],
                         'A2': 7,
                         'A3': 'x'}})

>>> import pytest
>>> with pytest.raises(ValueError):
        jp.flatten_seq(TEST_CASE2)
  • add prefix
>>> print(jp.flatten_seq(TEST_CASE1, prefix='F'))
defaultdict(None, {'F_0': {'A1': 1,
                           'A2': 2,
                           'A3': 3},
                   'F_1': {'A1': [4, 5, 6],
                           'A2': 7,
                           'A3': 'x'}})

flatten_map - flatten until encounter MutableSequence object

  • default
>>> print(jp.flatten_map(TEST_CASE2))
defaultdict(None, {'A1': [{'B1': 4, 'B2': 5},
                          {'B1': 6, 'B3': 7}],
                   'A2_C1': [8, 9],
                   'A2_C2': [10, 11],
                   'A3_A4': 10,
                   'A3_A1_B4': 12})

>>> import pytest
>>> with pytest.raises(ValueError):
        jp.flatten_map(TEST_CASE1)
  • add prefix
>>> print(jp.flatten_map(TEST_CASE2, prefix='F'))
defaultdict(None, {'F_A1': [{'B1': 4, 'B2': 5},
                            {'B1': 6, 'B3': 7}],
                   'F_A2_C1': [8, 9],
                   'F_A2_C2': [10, 11],
                   'F_A3_A4': 10,
                   'F_A3_A1_B4': 12})

filter

  • default, how='select'
>>> print(jp.filter('A', ['A1']))
None

>>> print(jp.filter(TEST_CASE1, ['A1']))
[{'A1': 1}, {'A1': [4, 5, 6]}]

>>> print(jp.filter(TEST_CASE2, ['A1']))
{'A1': [{'B1': 4, 'B2': 5}, {'B1': 6, 'B3': 7}]}
  • set how='drop'
>>> print(jp.filter('A', ['A1'], 'drop'))
None

>>> print(jp.filter(TEST_CASE1, ['A1'], 'drop'))
[{'A2': 2, 'A3': 3}, {'A2': 7, 'A3': 'x'}]

>>> print(jp.filter(TEST_CASE2, ['A1'], 'drop'))
{'A2': {'C1': [8, 9],
        'C2': [10, 11]},
 'A3': {'A4': 10,
        'A1': {'B4': 12}}}

select

  • default
>>> print([s for s in jp.select('A', ['A1'])])
[]

>>> print([s for s in jp.select(TEST_CASE1, ['A1'])])
[1, [4, 5, 6]]

>>> print([s for s in jp.select(TEST_CASE2, ['A1'])])
[[{'B1': 4, 'B2': 5},
  {'B1': 6, 'B3': 7}],
 {'B4': 12}]
  • add has_subkeys condition
>>> print([s for s in jp.select('A', ['A1'], has_subkeys=['B1'])])
[]

>>> print([s for s in jp.select(TEST_CASE1, ['A1'], has_subkeys=['B1'])])
[]

>>> print([s for s in jp.select(TEST_CASE2, ['A1'], has_subkeys=['B1'])])
[[{'B1': 4, 'B2': 5},
  {'B1': 6, 'B3': 7}]]
  • add drop_subkeys condition
>>> print([s for s in jp.select('A', ['A1'], drop_subkeys=['B1'])])
[]

>>> print([s for s in jp.select(TEST_CASE1, ['A1'], drop_subkeys=['B1'])])
[]

>>> print([s for s in jp.select(TEST_CASE2, ['A1'], drop_subkeys=['B1'])])
[[{'B2': 5}, {'B3': 7}],
 {'B4': 12}]
  • set gross=True

(When sel_keys’s value is MutableSequence) Any next-subelement has has_subkeys leads to select the whole if gross=True. Otherwise, select next-subelement which has has_subkeys one by one.

>>> print([s for s in jp.select('A', ['A1'], gross=True)])
[]

>>> print([s for s in jp.select(TEST_CASE1, ['A1'], gross=True)])
[1, [4, 5, 6]]

>>> print([s for s in jp.select(TEST_CASE2, ['A1'], gross=True)])
[[{'B1': 4, 'B2': 5},
  {'B1': 6, 'B3': 7}],
 {'B4': 12}]

update

  • default, gross=False
>>> print(jp.update(TEST_CASE1, ['A1'], 10086))
[{'A1': 10086, 'A2': 2, 'A3': 3},
 {'A1': [10086, 10086, 10086], 'A2': 7, 'A3': 'x'}]

>>> print(jp.update(TEST_CASE2, ['A1'], 10086))
{'A1': [10086, 10086],
 'A2': {'C1': [8, 9], 'C2': [10, 11]},
 'A3': {'A4': 10, 'A1': 10086}}
  • add has_subkeys condition
>>> print(jp.update(TEST_CASE1, ['A1'], 10086, has_subkeys=['B1']))
[{'A1': 1, 'A2': 2, 'A3': 3},
 {'A1': [4, 5, 6], 'A2': 7, 'A3': 'x'}]

>>> print(jp.update(TEST_CASE2, ['A1'], 10086, has_subkeys=['B1']))
{'A1': [10086, 10086],
 'A2': {'C1': [8, 9], 'C2': [10, 11]},
 'A3': {'A4': 10, 'A1': {'B4': 12}}}
  • set gross=True
>>> print(jp.update(TEST_CASE1, ['A1'], 10086, gross=True))
[{'A1': 10086, 'A2': 2, 'A3': 3},
 {'A1': 10086, 'A2': 7, 'A3': 'x'}]

>>> print(jp.update(TEST_CASE2, ['A1'], 10086, gross=True))
{'A1': 10086,
 'A2': {'C1': [8, 9], 'C2': [10, 11]},
 'A3': {'A4': 10, 'A1': 10086}}

sort

  • default, sort_by='key', reverse=False
>>> print(jp.sort(TEST_CASE1))
[OrderedDict([('A1', 1), ('A2', 2), ('A3', 3)]),
 OrderedDict([('A1', [4, 5, 6]), ('A2', 7), ('A3', 'x')])]

>>> print(jp.sort(TEST_CASE2))
OrderedDict([('A1', [OrderedDict([('B1', 4), ('B2', 5)]),
                     OrderedDict([('B1', 6), ('B3', 7)])]),
             ('A2', OrderedDict([('C1', [8, 9]), ('C2', [10, 11])])),
             ('A3', OrderedDict([('A1', OrderedDict([('B4', 12)])),
                                 ('A4', 10)]))])
  • set sort_by='value'
>>> print(jp.sort(TEST_CASE1, sort_by='value'))
[OrderedDict([('A1', 1), ('A2', 2), ('A3', 3)]),
 OrderedDict([('A2', 7), ('A1', [4, 5, 6]), ('A3', 'x')])]

>>> print(jp.sort(TEST_CASE2, sort_by='value'))
OrderedDict([('A3', OrderedDict([('A4', 10),
                                 ('A1', OrderedDict([('B4', 12)]))])),
             ('A2', OrderedDict([('C2', [10, 11]),
                                 ('C1', [8, 9])])),
             ('A1', [OrderedDict([('B1', 4), ('B2', 5)]),
                     OrderedDict([('B1', 6), ('B3', 7)])])])
  • set reverse=True
>>> print(jp.sort(TEST_CASE1, reverse=True))
[OrderedDict([('A3', 3), ('A2', 2), ('A1', 1)]),
 OrderedDict([('A3', 'x'), ('A2', 7), ('A1', [4, 5, 6])])]

>>> print(jp.sort(TEST_CASE2, reverse=True))
OrderedDict([('A3', OrderedDict([('A4', 10),
                                 ('A1', OrderedDict([('B4', 12)]))])),
             ('A2', OrderedDict([('C2', [10, 11]),
                                 ('C1', [8, 9])])),
             ('A1', [OrderedDict([('B2', 5), ('B1', 4)]),
                     OrderedDict([('B3', 7), ('B1', 6)])])])

to_df

  • default, flatten=True
>>> print(jp.to_df(TEST_CASE1))
   0_A1  0_A2  0_A3  1_A1_0  1_A1_1  1_A1_2  1_A2 1_A3
0     1     2     3       4       5       6     7    x

>>> print(jp.to_df(TEST_CASE2))
   A1_0_B1  A1_0_B2  A1_1_B1  A1_1_B3  ...    A2_C2_0  A2_C2_1  A3_A1_B4  A3_A4
0        4        5        6        7  ...         10       11        12     10
  • set flatten=False
>>> print(jp.to_df(TEST_CASE1, flatten=False))
          A1  A2 A3
0          1   2  3
1  [4, 5, 6]   7  x

>>> print(jp.to_df(TEST_CASE2, flatten=False))
                                         A1   A2_C1  ...   A3_A1_B4  A3_A4
0  [{'B1': 4, 'B2': 5}, {'B1': 6, 'B3': 7}]  [8, 9]  ...         12     10