pub1c-web/brackets.py


from typing import Union, List, Iterator

BOM = '\ufeff'
ParsingIterator = Iterator[Union[str, 'ParsingIterator']]
NestedStringList = Union[str, List['NestedStringList']]


def text_reader(filename: str, encoding: str = 'utf-8') -> Iterator[str]:
    with open(filename, 'rt', encoding=encoding, buffering=1) as f:
        while True:
            chunk: str = f.read(1)
            if 0 == len(chunk):
                break
            elif chunk.startswith(BOM):
                continue
            yield chunk


def brackets_parser(reader: Iterator[str], level: int = 0) \
        -> ParsingIterator:
    cur_elem: str = ""
    is_quoted: bool = False
    for chunk in reader:
        if '{' == chunk and not is_quoted:
            if 0 == level:  # skip document root, always look at first {
                level += 1
                continue
            nested = brackets_parser(reader, level + 1)
            yield nested
            # ensure we are done for nested
            for _ in nested:
                pass
            continue
        elif ',' == chunk and not is_quoted:
            if 0 < len(cur_elem):
                yield cur_elem
                cur_elem = ''
            continue
        elif '}' == chunk and not is_quoted:
            if 0 < len(cur_elem):
                yield cur_elem
            break
        elif '"' == chunk:
            is_quoted = not is_quoted
            continue
        elif '\n' == chunk or '\r' == chunk:
            continue
        cur_elem += chunk


def brackets_select(parser: ParsingIterator, selector: str = '') \
        -> NestedStringList:
    if '' == selector:
        l_selector = []
        elem = parser
    else:
        l_selector = selector.split('/')
        cur_sel: str = l_selector.pop(0)
        idx: int = int(cur_sel)
        _, elem = next(filter(lambda x: x[0] == idx, enumerate(parser)))

    if 0 == len(l_selector):
        if type(elem) == str:
            return elem
        else:
            return [i if type(i) == str else brackets_select(i) for i in elem]

    return brackets_select(elem, '/'.join(l_selector))


def get_infobases(filename: str) -> List[str]:
    reader = text_reader(filename)
    parser = brackets_parser(reader)
    base_list = brackets_select(parser, '2')[1:]
    return list(map(lambda x: x[1], base_list))