Skip to content

Helpers



File with helper function implementations

json_save(path, obj)

Save supported files to json format

Parameters:

Name Type Description Default
path

path to save object

required

obj: Dict object

Source code in app/utils.py
def json_save(path, obj):
    """
    Save supported files to json format
    Parameters
    ----------
    path: Union[str, Path]
        path to save object

    -------
    obj: Dict
        object

    Returns
    ---------
    """
    with open(path, 'w', encoding='utf-8') as file:
        json.dump(obj, file)

json_load(path, encoding='utf-8')

Load supported json format files

Parameters:

Name Type Description Default
path str

path of the json file

required
encoding str

encoding to use

'utf-8'

Returns:

Name Type Description
obj Object

any object with any json supported format

Source code in app/utils.py
def json_load(path, encoding='utf-8'):
    """
    Load supported json format files

    Parameters
    ----------
    path : str
        path of the json file
    encoding : str
        encoding to use

    Returns
    -------
    obj : Object
        any object with any json supported format
    """
    with open(path, 'r', encoding=encoding) as file:
        obj = json.load(file)
        return obj

preprocess_remark(remark)

Replace html tags and pre-war

Parameters:

Name Type Description Default
remark str

home remark

required

Returns:

Name Type Description
remark str

remark without html tags and prewar instead of pre-war

Source code in app/utils.py
def preprocess_remark(remark):
    """
    Replace html tags and pre-war

    Parameters
    ----------
    remark : str
        home remark

    Returns
    -------
    remark : str
        remark without html tags and prewar instead of pre-war
    """
    if remark is None:
        return
    if not remark.strip():
        return remark
    remark = " " + remark + " "
    cleaner = re.compile(
        r'<.*?>|[0-9]+&quot;|&quot;|\S*https?:\S*|\S*http?:\S*|\**|File No. [A-Z]+[0-9]+-+[0-9][0-9][0-9]+.|[A-Z]+[0-9]+-+[0-9][0-9][0-9]+.')
    remark = re.sub(cleaner, '', remark)
    remark = re.sub(' +', ' ', remark)
    remark = remark.replace(" pre-war ", " prewar")
    remark = remark.replace(" Pre-war ", " prewar")
    remark = remark.replace(" isn't ", " is not ")
    remark = remark.replace(" isn t ", " is not ")
    remark = remark.replace(" isnt ", " is not ")
    remark = remark.replace(" aren't ", " are not ")
    remark = remark.replace(" aren t ", " are not ")
    remark = remark.replace(" arent ", " are not ")
    remark = remark.replace(" town home", " townhome")
    remark = remark.replace(" Town home", " Townhome")
    remark = remark.replace(" TOWN HOME", " TOWNHOME")
    remark = remark.strip()
    if not remark:
        return remark
    if remark[-1] not in [".", "!", "?"]:
        remark += "."

    return remark

replace_none(patterns, home_synonyms)

Replace none in patters with home synonyms

Parameters:

Name Type Description Default
patterns list

list of patterns

required
home_synonyms list

home synonyms

required

Returns:

Name Type Description
patterns list

list of patterns with home synonyms

Source code in app/utils.py
def replace_none(patterns, home_synonyms):
    """
    Replace none in patters with home synonyms

    Parameters
    ----------
    patterns : list
        list of patterns
    home_synonyms : list
        home synonyms

    Returns
    -------
    patterns : list
        list of patterns with home synonyms
    """

    for pattern in patterns:
        pattern[0]["RIGHT_ATTRS"]["LOWER"]["IN"] = home_synonyms
    return patterns

generate_guid(k=6)

Generate random id

Parameters:

Name Type Description Default
k

length of guid

6

Returns:

Name Type Description
guid string

random id in string format

Source code in app/utils.py
def generate_guid(k=6):
    """
    Generate random id

    Parameters
    ----------
    k: int
        length of guid

    Returns
    -------
    guid : string
        random id in string format
    """

    guid = ''.join(random.choices(string.ascii_uppercase + string.digits, k=k))
    return guid

filter_and_sort_list(to_process, by)

Filter and reorder a list based on given ranking

Parameters:

Name Type Description Default
to_process list

elements to filter and sort

required
by list

determines which elements should stay from the original list and in what order

required

Returns:

Name Type Description
out list

filtered and sorted list

inputs = ["A", "B", "C", "D", "E", "F", "G"] vowels = ["E", "O", "A", "U", "I"] filter_and_sort_list(to_process=inputs, by=vowels) ["E", "A"]

Source code in app/utils.py
def filter_and_sort_list(to_process, by):
    """
    Filter and reorder a list based on given ranking

    Parameters
    ----------
    to_process : list
        elements to filter and sort
    by : list
        determines which elements should stay from the original list and in what order

    Returns
    -------
    out : list
        filtered and sorted list

    >>> inputs = ["A", "B", "C", "D", "E", "F", "G"]
    >>> vowels = ["E", "O", "A", "U", "I"]
    >>> filter_and_sort_list(to_process=inputs, by=vowels)
    >>> ["E", "A"]
    """
    if to_process is None:
        return []
    if by is None:
        return [] if to_process is None else to_process
    return [elem for elem in by if elem.lower() in (item.lower() for item in to_process)]

generate_item_order(num1, num2)

Generate a 2d ordering of indices where - diagonal elems are most preferred ones (from top to bottom) - then come the combination with unused indices (if one of the list is longer than other) - then all the unused items from left to right from top to bottom

Parameters:

Name Type Description Default
num1 int

length of ordering (rows and cols)

required
num2 int

length of ordering (rows and cols)

required

Returns:

Name Type Description
out list of tuple of int

ordered indices

Source code in app/utils.py
def generate_item_order(num1, num2):
    """
    Generate a 2d ordering of indices where
     - diagonal elems are most preferred ones (from top to bottom)
     - then come the combination with unused indices (if one of the list is longer than other)
     - then all the unused items from left to right from top to bottom

    Parameters
    ----------
    num1, num2 : int
        length of ordering (rows and cols)

    Returns
    -------
    out : list of tuple of int
        ordered indices
    """

    n, m = num1, num2
    reverse = n < m
    n, m = (range(m), range(n)) if reverse else (range(n), range(m))
    order = get_different_elems(n, m)
    order += [(i, j) for i in n for j in m if (i, j) not in order]
    if reverse:
        return [(j, i) for i, j in order]
    return order

get_different_elems(long, short)

Create a list of pairs from given lists when diagonal elems are first, then if one list is than another the range is "shifted" and new diagonal is taken etc.. Ex.

get_different_elems(range(6), range(4)) [(0, 0), (1, 1), (2, 2), (3, 3), (4, 0), (5, 1)]

Parameters:

Name Type Description Default
long List of int

items to create new pairs from. For best results put longer one first

required
short List of int

items to create new pairs from. For best results put longer one first

required

Returns:

Name Type Description
out List of Tuple

indices of items

Source code in app/utils.py
def get_different_elems(long, short):
    """
    Create a list of pairs from given lists when diagonal elems are first, then if one
    list is than another the range is "shifted" and new diagonal is taken etc..
    Ex.
    >>> get_different_elems(range(6), range(4))
    >>> [(0, 0), (1, 1), (2, 2), (3, 3), (4, 0), (5, 1)]
    Parameters
    ----------
    long, short : List of int
        items to create new pairs from. For best results put longer one first

    Returns
    -------
    out : List of Tuple
        indices of items
    """
    count = 0
    result = [(i, j) for i, j in zip(long[count:], short)]
    count += len(short)
    if long[count:]:
        result += get_different_elems(long[count:], short)
    return result

connect_lists_by_order(arr1, arr2)

Create list of pairs from 2 lists, where the diagonal elems are first, then come unused

Parameters:

Name Type Description Default
arr1 list

arrays to connect

required
arr2 list

arrays to connect

required

Returns:

Name Type Description
out List[Tuple]

connected lists

Source code in app/utils.py
def connect_lists_by_order(arr1, arr2):
    """
    Create list of pairs from 2 lists, where the diagonal elems are first, then come unused

    Parameters
    ----------
    arr1, arr2 : list
        arrays to connect

    Returns
    -------
    out : List[Tuple]
        connected lists

    """
    indices = generate_item_order(len(arr1), len(arr2))
    return [(arr1[i], arr2[j]) for i, j in indices]

remove_used_strings(to_filter, by)

Remove all elements from one list that are substrings of any element of the second list

Parameters:

Name Type Description Default
to_filter list of str

strings to filter

required
by list of str

strings to search for substrings in

required

Returns:

Name Type Description
out List[str]

filtered strings

Source code in app/utils.py
def remove_used_strings(to_filter, by):
    """
    Remove all elements from one list that are substrings of any element of the second list

    Parameters
    ----------
    to_filter : list of str
        strings to filter
    by : list of str
        strings to search for substrings in

    Returns
    -------
    out : List[str]
        filtered strings
    """
    if to_filter is None:
        return []
    if by is None:
        return [] if to_filter is None else to_filter
    unused = [pattern for pattern in to_filter if all(pattern not in headline for headline in by)]

    return unused

lowercase_phrase(phrase, exceptions)

Lowercase the phrase except for the words in exceptions list

Parameters:

Name Type Description Default
phrase str

string to lowercase possibly consisting of several words

required
exceptions List[str]

list of words that shouldn't be lowercased

required

Returns:

Name Type Description
out str

processed string

Source code in app/utils.py
def lowercase_phrase(phrase, exceptions):
    """
    Lowercase the phrase except for the words in exceptions list

    Parameters
    ----------
    phrase : str
        string to lowercase possibly consisting of several words
    exceptions : List[str]
        list of words that shouldn't be lowercased

    Returns
    -------
    out : str
        processed string
    """
    words = phrase.split()
    processed = [word if word in exceptions else word.lower() for word in words]
    return " ".join(processed)

case_insensitive_intersection(list1, list2)

Return intersection of two lists of string preserving the original order and case from the first list. The intersection is case insensitive

Parameters:

Name Type Description Default
list1 List of str

lists to find the intersection of

required
list2 List of str

lists to find the intersection of

required

Returns:

Name Type Description
out List of str

elements that are present in both lists in the order they appear in list1

Source code in app/utils.py
def case_insensitive_intersection(list1, list2):
    """
    Return intersection of two lists of string preserving the original order and case from the
    first list. The intersection is case insensitive

    Parameters
    ----------
    list1, list2 : List of str
        lists to find the intersection of

    Returns
    --------
    out : List of str
        elements that are present in both lists in the order they appear in list1
    """
    if (not list1) or (not list2):
        return []
    lower_list2 = [item.lower() for item in list2]
    return [item for item in list1 if item.lower() in lower_list2]

del_duplicates_ordered(seq, ignore_case=False)

Remove duplicate elements from the sequence preserving their initial order

Parameters:

Name Type Description Default
seq List

elements to filter

required
ignore_case bool

valid only for list of strings. Shows whether to treat uppercase/lowercase versions of the same string as duplicates

False

Returns:

Name Type Description
out List

deduplicated list

Source code in app/utils.py
def del_duplicates_ordered(seq, ignore_case=False):
    """
    Remove duplicate elements from the sequence preserving their initial order

    Parameters
    ----------
    seq : List
        elements to filter
    ignore_case : bool
        valid only for list of strings. Shows whether to treat uppercase/lowercase versions of
        the same string as duplicates

    Returns
    --------
    out : List
        deduplicated list
    """
    if not seq:
        return []
    if ignore_case:
        _, indices = np.unique([item.lower() for item in seq],
                               return_index=True)
    else:
        _, indices = np.unique(seq, return_index=True)

    sorted_indices = sorted(indices)
    return np.array(seq)[sorted_indices].tolist()

generate_openapi_json(openapi_version='3.1.0')

Generate and save openapi specification of the application

Parameters:

Name Type Description Default
openapi_version str

openapi version specifier

'3.1.0'

Returns:

Type Description
None
Source code in app/utils.py
def generate_openapi_json(openapi_version="3.1.0"):
    """
    Generate and save openapi specification of the application

    Parameters
    ----------
    openapi_version : str
        openapi version specifier

    Returns
    -------
    None
    """
    parser = ArgumentParser()
    parser.add_argument('--app_path')
    parser.add_argument('--server_url', default='0.0.0.0:8000')
    args = parser.parse_args()
    app_path = args.app_path
    server_url = args.server_url
    module = importlib.import_module(app_path)
    app = module.app

    openapi_json = get_openapi(
        title=app.title,
        openapi_version=openapi_version,
        version=app.version,
        description=app.description,
        routes=app.routes,
        servers=[{'url': server_url}]
    )
    json_save('openapi.json', openapi_json)

make_prompt(system_message, prompt)

Make prompt from two separate parts

Parameters:

Name Type Description Default
system_message List[Dict]

The message to help generate text

required
prompt str

Prompt with which to generate

required

Returns:

Name Type Description
out List[Dict]
Source code in app/utils.py
def make_prompt(system_message: List[Dict], prompt: str) -> List[Dict]:
    """
    Make prompt from two separate parts

    Parameters
    ----------
    system_message: List[Dict]
        The message to help generate text
    prompt: str
        Prompt with which to generate

    Returns
    -------
    out : List[Dict]
    """
    system_message[1]["content"] = prompt
    return system_message

replace_pattern(match)

Replace patterns in a matched pattern with commas and spaces.

Parameters:

Name Type Description Default
match Match

A regular expression match object.

required

Returns:

Name Type Description
out str
Source code in app/utils.py
def replace_pattern(match) -> str:
    """
    Replace patterns in a matched pattern with commas and spaces.

    Parameters
    ----------
    match : re.Match
        A regular expression match object.

    Returns
    -------
    out : str
    """
    digit1, ba_br_pattern1, digit2, ba_br_pattern2 = match.groups()
    if digit1 and ba_br_pattern1 and digit2 and ba_br_pattern2:
        return f'{digit1} {ba_br_pattern1}, {digit2} {ba_br_pattern2}'
    elif not digit1 and ba_br_pattern1 and digit2 and ba_br_pattern2:
        return f'{ba_br_pattern1}, {digit2} {ba_br_pattern2}'
    elif digit1 and ba_br_pattern1 and not digit2 and ba_br_pattern2:
        return f'{digit1} {ba_br_pattern1}, {ba_br_pattern2}'
    else:
        return f'{digit1 or ""} {ba_br_pattern1 or ""}{digit2 or ""} {ba_br_pattern2 or ""}'