pub1c-web/app/brackets.py


from typing import Union, List, Iterator

BOM = '\ufeff'
ParsingIterator = Iterator[Union[str, 'ParsingIterator']]
NestedStringList = Union[str, List['NestedStringList']]


def text_reader(filename: str, encoding: str = 'utf-8') -> Iterator[str]:
    with open(filename, 'rt', encoding=encoding, buffering=1) as f:
        while True:
            chunk: str = f.read(1)
            if 0 == len(chunk):
                break
            elif chunk.startswith(BOM):
                continue
            yield chunk


def brackets_parser(reader: Iterator[str], level: int = 0) \
        -> ParsingIterator:
    cur_elem: str = ""
    is_quoted: bool = False
    for chunk in reader:
        if '{' == chunk and not is_quoted:
            if 0 == level:  # skip document root, always look at first {
                level += 1
                continue
            nested = brackets_parser(reader, level + 1)
            yield nested
            # ensure we are done for nested
            for _ in nested:
                pass
            continue
        elif ',' == chunk and not is_quoted:
            if 0 < len(cur_elem):
                yield cur_elem
                cur_elem = ''
            continue
        elif '}' == chunk and not is_quoted:
            if 0 < len(cur_elem):
                yield cur_elem
            break
        elif '"' == chunk:
            is_quoted = not is_quoted
            continue
        elif '\n' == chunk or '\r' == chunk:
            continue
        cur_elem += chunk


def brackets_select(parser: ParsingIterator, selector: str = '') \
        -> NestedStringList:
    if '' == selector:
        l_selector = []
        elem = parser
    else:
        l_selector = selector.split('/')
        cur_sel: str = l_selector.pop(0)
        idx: int = int(cur_sel)
        _, elem = next(filter(lambda x: x[0] == idx, enumerate(parser)))

    if 0 == len(l_selector):
        if type(elem) == str:
            return elem
        else:
            return [i if type(i) == str else brackets_select(i) for i in elem]

    return brackets_select(elem, '/'.join(l_selector))


def get_infobases(filename: str) -> List[str]:
    reader = text_reader(filename)
    parser = brackets_parser(reader)
    base_list = brackets_select(parser, '2')[1:]
    return list(map(lambda x: x[1], base_list))
initial commit 2021-05-21 12:19:11 +00:00
			`from typing import Union, List, Iterator`

			`BOM = '\ufeff'`
			`ParsingIterator = Iterator[Union[str, 'ParsingIterator']]`
			`NestedStringList = Union[str, List['NestedStringList']]`


			`def text_reader(filename: str, encoding: str = 'utf-8') -> Iterator[str]:`
			`with open(filename, 'rt', encoding=encoding, buffering=1) as f:`
			`while True:`
			`chunk: str = f.read(1)`
			`if 0 == len(chunk):`
			`break`
			`elif chunk.startswith(BOM):`
			`continue`
			`yield chunk`


			`def brackets_parser(reader: Iterator[str], level: int = 0) \`
			`-> ParsingIterator:`
			`cur_elem: str = ""`
			`is_quoted: bool = False`
			`for chunk in reader:`
			`if '{' == chunk and not is_quoted:`
			`if 0 == level: # skip document root, always look at first {`
			`level += 1`
			`continue`
			`nested = brackets_parser(reader, level + 1)`
			`yield nested`
			`# ensure we are done for nested`
			`for _ in nested:`
			`pass`
			`continue`
			`elif ',' == chunk and not is_quoted:`
			`if 0 < len(cur_elem):`
			`yield cur_elem`
			`cur_elem = ''`
			`continue`
			`elif '}' == chunk and not is_quoted:`
			`if 0 < len(cur_elem):`
			`yield cur_elem`
			`break`
			`elif '"' == chunk:`
			`is_quoted = not is_quoted`
			`continue`
			`elif '\n' == chunk or '\r' == chunk:`
			`continue`
			`cur_elem += chunk`


			`def brackets_select(parser: ParsingIterator, selector: str = '') \`
			`-> NestedStringList:`
			`if '' == selector:`
			`l_selector = []`
			`elem = parser`
			`else:`
			`l_selector = selector.split('/')`
			`cur_sel: str = l_selector.pop(0)`
			`idx: int = int(cur_sel)`
			`_, elem = next(filter(lambda x: x[0] == idx, enumerate(parser)))`

			`if 0 == len(l_selector):`
			`if type(elem) == str:`
			`return elem`
			`else:`
			`return [i if type(i) == str else brackets_select(i) for i in elem]`

			`return brackets_select(elem, '/'.join(l_selector))`


			`def get_infobases(filename: str) -> List[str]:`
			`reader = text_reader(filename)`
			`parser = brackets_parser(reader)`
			`base_list = brackets_select(parser, '2')[1:]`
			`return list(map(lambda x: x[1], base_list))`