Helpers

File with helper function implementations

`json_save(path, obj)`

Save supported files to json format

Parameters:

Name	Type	Description	Default
`path`		path to save object	required

obj: Dict object

Source code in app/utils.py

def json_save(path, obj):
    """
    Save supported files to json format
    Parameters
    ----------
    path: Union[str, Path]
        path to save object

    -------
    obj: Dict
        object

    Returns
    ---------
    """
    with open(path, 'w', encoding='utf-8') as file:
        json.dump(obj, file)

`json_load(path, encoding='utf-8')`

Load supported json format files

Parameters:

Name	Type	Description	Default
`path`	`str`	path of the json file	required
`encoding`	`str`	encoding to use	`'utf-8'`

Returns:

Name	Type	Description
`obj`	`Object`	any object with any json supported format

Source code in app/utils.py

def json_load(path, encoding='utf-8'):
    """
    Load supported json format files

    Parameters
    ----------
    path : str
        path of the json file
    encoding : str
        encoding to use

    Returns
    -------
    obj : Object
        any object with any json supported format
    """
    with open(path, 'r', encoding=encoding) as file:
        obj = json.load(file)
        return obj

`preprocess_remark(remark)`

Replace html tags and pre-war

Parameters:

Name	Type	Description	Default
`remark`	`str`	home remark	required

Returns:

Name	Type	Description
`remark`	`str`	remark without html tags and prewar instead of pre-war

Source code in app/utils.py

def preprocess_remark(remark):
    """
    Replace html tags and pre-war

    Parameters
    ----------
    remark : str
        home remark

    Returns
    -------
    remark : str
        remark without html tags and prewar instead of pre-war
    """
    if remark is None:
        return
    if not remark.strip():
        return remark
    remark = " " + remark + " "
    cleaner = re.compile(
        r'<.*?>|[0-9]+&quot;|&quot;|\S*https?:\S*|\S*http?:\S*|\**|File No. [A-Z]+[0-9]+-+[0-9][0-9][0-9]+.|[A-Z]+[0-9]+-+[0-9][0-9][0-9]+.')
    remark = re.sub(cleaner, '', remark)
    remark = re.sub(' +', ' ', remark)
    remark = remark.replace(" pre-war ", " prewar")
    remark = remark.replace(" Pre-war ", " prewar")
    remark = remark.replace(" isn't ", " is not ")
    remark = remark.replace(" isn t ", " is not ")
    remark = remark.replace(" isnt ", " is not ")
    remark = remark.replace(" aren't ", " are not ")
    remark = remark.replace(" aren t ", " are not ")
    remark = remark.replace(" arent ", " are not ")
    remark = remark.replace(" town home", " townhome")
    remark = remark.replace(" Town home", " Townhome")
    remark = remark.replace(" TOWN HOME", " TOWNHOME")
    remark = remark.strip()
    if not remark:
        return remark
    if remark[-1] not in [".", "!", "?"]:
        remark += "."

    return remark

`replace_none(patterns, home_synonyms)`

Replace none in patters with home synonyms

Parameters:

Name	Type	Description	Default
`patterns`	`list`	list of patterns	required
`home_synonyms`	`list`	home synonyms	required

Returns:

Name	Type	Description
`patterns`	`list`	list of patterns with home synonyms

Source code in app/utils.py

def replace_none(patterns, home_synonyms):
    """
    Replace none in patters with home synonyms

    Parameters
    ----------
    patterns : list
        list of patterns
    home_synonyms : list
        home synonyms

    Returns
    -------
    patterns : list
        list of patterns with home synonyms
    """

    for pattern in patterns:
        pattern[0]["RIGHT_ATTRS"]["LOWER"]["IN"] = home_synonyms
    return patterns

`generate_guid(k=6)`

Generate random id

Parameters:

Name	Type	Description	Default
`k`		length of guid	`6`

Returns:

Name	Type	Description
`guid`	`string`	random id in string format

Source code in app/utils.py

def generate_guid(k=6):
    """
    Generate random id

    Parameters
    ----------
    k: int
        length of guid

    Returns
    -------
    guid : string
        random id in string format
    """

    guid = ''.join(random.choices(string.ascii_uppercase + string.digits, k=k))
    return guid

`filter_and_sort_list(to_process, by)`

Filter and reorder a list based on given ranking

Parameters:

Name	Type	Description	Default
`to_process`	`list`	elements to filter and sort	required
`by`	`list`	determines which elements should stay from the original list and in what order	required

Returns:

Name	Type	Description
`out`	`list`	filtered and sorted list

inputs = ["A", "B", "C", "D", "E", "F", "G"] vowels = ["E", "O", "A", "U", "I"] filter_and_sort_list(to_process=inputs, by=vowels) ["E", "A"]

Source code in app/utils.py

def filter_and_sort_list(to_process, by):
    """
    Filter and reorder a list based on given ranking

    Parameters
    ----------
    to_process : list
        elements to filter and sort
    by : list
        determines which elements should stay from the original list and in what order

    Returns
    -------
    out : list
        filtered and sorted list

    >>> inputs = ["A", "B", "C", "D", "E", "F", "G"]
    >>> vowels = ["E", "O", "A", "U", "I"]
    >>> filter_and_sort_list(to_process=inputs, by=vowels)
    >>> ["E", "A"]
    """
    if to_process is None:
        return []
    if by is None:
        return [] if to_process is None else to_process
    return [elem for elem in by if elem.lower() in (item.lower() for item in to_process)]

`generate_item_order(num1, num2)`

Generate a 2d ordering of indices where - diagonal elems are most preferred ones (from top to bottom) - then come the combination with unused indices (if one of the list is longer than other) - then all the unused items from left to right from top to bottom

Parameters:

Name	Type	Description	Default
`num1`	`int`	length of ordering (rows and cols)	required
`num2`	`int`	length of ordering (rows and cols)	required

Returns:

Name	Type	Description
`out`	`list of tuple of int`	ordered indices

Source code in app/utils.py

def generate_item_order(num1, num2):
    """
    Generate a 2d ordering of indices where
     - diagonal elems are most preferred ones (from top to bottom)
     - then come the combination with unused indices (if one of the list is longer than other)
     - then all the unused items from left to right from top to bottom

    Parameters
    ----------
    num1, num2 : int
        length of ordering (rows and cols)

    Returns
    -------
    out : list of tuple of int
        ordered indices
    """

    n, m = num1, num2
    reverse = n < m
    n, m = (range(m), range(n)) if reverse else (range(n), range(m))
    order = get_different_elems(n, m)
    order += [(i, j) for i in n for j in m if (i, j) not in order]
    if reverse:
        return [(j, i) for i, j in order]
    return order

`get_different_elems(long, short)`

Create a list of pairs from given lists when diagonal elems are first, then if one list is than another the range is "shifted" and new diagonal is taken etc.. Ex.

get_different_elems(range(6), range(4)) [(0, 0), (1, 1), (2, 2), (3, 3), (4, 0), (5, 1)]

Parameters:

Name	Type	Description	Default
`long`	`List of int`	items to create new pairs from. For best results put longer one first	required
`short`	`List of int`	items to create new pairs from. For best results put longer one first	required

Returns:

Name	Type	Description
`out`	`List of Tuple`	indices of items

Source code in app/utils.py

def get_different_elems(long, short):
    """
    Create a list of pairs from given lists when diagonal elems are first, then if one
    list is than another the range is "shifted" and new diagonal is taken etc..
    Ex.
    >>> get_different_elems(range(6), range(4))
    >>> [(0, 0), (1, 1), (2, 2), (3, 3), (4, 0), (5, 1)]
    Parameters
    ----------
    long, short : List of int
        items to create new pairs from. For best results put longer one first

    Returns
    -------
    out : List of Tuple
        indices of items
    """
    count = 0
    result = [(i, j) for i, j in zip(long[count:], short)]
    count += len(short)
    if long[count:]:
        result += get_different_elems(long[count:], short)
    return result

`connect_lists_by_order(arr1, arr2)`

Create list of pairs from 2 lists, where the diagonal elems are first, then come unused

Parameters:

Name	Type	Description	Default
`arr1`	`list`	arrays to connect	required
`arr2`	`list`	arrays to connect	required

Returns:

Name	Type	Description
`out`	`List[Tuple]`	connected lists

Source code in app/utils.py

def connect_lists_by_order(arr1, arr2):
    """
    Create list of pairs from 2 lists, where the diagonal elems are first, then come unused

    Parameters
    ----------
    arr1, arr2 : list
        arrays to connect

    Returns
    -------
    out : List[Tuple]
        connected lists

    """
    indices = generate_item_order(len(arr1), len(arr2))
    return [(arr1[i], arr2[j]) for i, j in indices]

`remove_used_strings(to_filter, by)`

Remove all elements from one list that are substrings of any element of the second list

Parameters:

Name	Type	Description	Default
`to_filter`	`list of str`	strings to filter	required
`by`	`list of str`	strings to search for substrings in	required

Returns:

Name	Type	Description
`out`	`List[str]`	filtered strings

Source code in app/utils.py

def remove_used_strings(to_filter, by):
    """
    Remove all elements from one list that are substrings of any element of the second list

    Parameters
    ----------
    to_filter : list of str
        strings to filter
    by : list of str
        strings to search for substrings in

    Returns
    -------
    out : List[str]
        filtered strings
    """
    if to_filter is None:
        return []
    if by is None:
        return [] if to_filter is None else to_filter
    unused = [pattern for pattern in to_filter if all(pattern not in headline for headline in by)]

    return unused

`lowercase_phrase(phrase, exceptions)`

Lowercase the phrase except for the words in exceptions list

Parameters:

Name	Type	Description	Default
`phrase`	`str`	string to lowercase possibly consisting of several words	required
`exceptions`	`List[str]`	list of words that shouldn't be lowercased	required

Returns:

Name	Type	Description
`out`	`str`	processed string

Source code in app/utils.py

def lowercase_phrase(phrase, exceptions):
    """
    Lowercase the phrase except for the words in exceptions list

    Parameters
    ----------
    phrase : str
        string to lowercase possibly consisting of several words
    exceptions : List[str]
        list of words that shouldn't be lowercased

    Returns
    -------
    out : str
        processed string
    """
    words = phrase.split()
    processed = [word if word in exceptions else word.lower() for word in words]
    return " ".join(processed)

`case_insensitive_intersection(list1, list2)`

Return intersection of two lists of string preserving the original order and case from the first list. The intersection is case insensitive

Parameters:

Name	Type	Description	Default
`list1`	`List of str`	lists to find the intersection of	required
`list2`	`List of str`	lists to find the intersection of	required

Returns:

Name	Type	Description
`out`	`List of str`	elements that are present in both lists in the order they appear in list1

Source code in app/utils.py

def case_insensitive_intersection(list1, list2):
    """
    Return intersection of two lists of string preserving the original order and case from the
    first list. The intersection is case insensitive

    Parameters
    ----------
    list1, list2 : List of str
        lists to find the intersection of

    Returns
    --------
    out : List of str
        elements that are present in both lists in the order they appear in list1
    """
    if (not list1) or (not list2):
        return []
    lower_list2 = [item.lower() for item in list2]
    return [item for item in list1 if item.lower() in lower_list2]

`del_duplicates_ordered(seq, ignore_case=False)`

Remove duplicate elements from the sequence preserving their initial order

Parameters:

Name	Type	Description	Default
`seq`	`List`	elements to filter	required
`ignore_case`	`bool`	valid only for list of strings. Shows whether to treat uppercase/lowercase versions of the same string as duplicates	`False`

Returns:

Name	Type	Description
`out`	`List`	deduplicated list

Source code in app/utils.py

def del_duplicates_ordered(seq, ignore_case=False):
    """
    Remove duplicate elements from the sequence preserving their initial order

    Parameters
    ----------
    seq : List
        elements to filter
    ignore_case : bool
        valid only for list of strings. Shows whether to treat uppercase/lowercase versions of
        the same string as duplicates

    Returns
    --------
    out : List
        deduplicated list
    """
    if not seq:
        return []
    if ignore_case:
        _, indices = np.unique([item.lower() for item in seq],
                               return_index=True)
    else:
        _, indices = np.unique(seq, return_index=True)

    sorted_indices = sorted(indices)
    return np.array(seq)[sorted_indices].tolist()

`generate_openapi_json(openapi_version='3.1.0')`

Generate and save openapi specification of the application

Parameters:

Name	Type	Description	Default
`openapi_version`	`str`	openapi version specifier	`'3.1.0'`

Returns:

Type	Description
`None`

Source code in app/utils.py

def generate_openapi_json(openapi_version="3.1.0"):
    """
    Generate and save openapi specification of the application

    Parameters
    ----------
    openapi_version : str
        openapi version specifier

    Returns
    -------
    None
    """
    parser = ArgumentParser()
    parser.add_argument('--app_path')
    parser.add_argument('--server_url', default='0.0.0.0:8000')
    args = parser.parse_args()
    app_path = args.app_path
    server_url = args.server_url
    module = importlib.import_module(app_path)
    app = module.app

    openapi_json = get_openapi(
        title=app.title,
        openapi_version=openapi_version,
        version=app.version,
        description=app.description,
        routes=app.routes,
        servers=[{'url': server_url}]
    )
    json_save('openapi.json', openapi_json)

`make_prompt(system_message, prompt)`

Make prompt from two separate parts

Parameters:

Name	Type	Description	Default
`system_message`	`List[Dict]`	The message to help generate text	required
`prompt`	`str`	Prompt with which to generate	required

Returns:

Name	Type	Description
`out`	`List[Dict]`

Source code in app/utils.py

def make_prompt(system_message: List[Dict], prompt: str) -> List[Dict]:
    """
    Make prompt from two separate parts

    Parameters
    ----------
    system_message: List[Dict]
        The message to help generate text
    prompt: str
        Prompt with which to generate

    Returns
    -------
    out : List[Dict]
    """
    system_message[1]["content"] = prompt
    return system_message

`replace_pattern(match)`

Replace patterns in a matched pattern with commas and spaces.

Parameters:

Name	Type	Description	Default
`match`	`Match`	A regular expression match object.	required

Returns:

Name	Type	Description
`out`	`str`

Source code in app/utils.py

def replace_pattern(match) -> str:
    """
    Replace patterns in a matched pattern with commas and spaces.

    Parameters
    ----------
    match : re.Match
        A regular expression match object.

    Returns
    -------
    out : str
    """
    digit1, ba_br_pattern1, digit2, ba_br_pattern2 = match.groups()
    if digit1 and ba_br_pattern1 and digit2 and ba_br_pattern2:
        return f'{digit1} {ba_br_pattern1}, {digit2} {ba_br_pattern2}'
    elif not digit1 and ba_br_pattern1 and digit2 and ba_br_pattern2:
        return f'{ba_br_pattern1}, {digit2} {ba_br_pattern2}'
    elif digit1 and ba_br_pattern1 and not digit2 and ba_br_pattern2:
        return f'{digit1} {ba_br_pattern1}, {ba_br_pattern2}'
    else:
        return f'{digit1 or ""} {ba_br_pattern1 or ""}{digit2 or ""} {ba_br_pattern2 or ""}'