Skip to content

Services and schemas



Main business logic and API functionality

Services

Main business logic implementation

Source code in app/services.py
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
class Services:
    """
    Main business logic implementation
    """

    def __init__(self):
        """
        Initiate headline extractor
        """
        self.headline_extractor = HeadlineExtractor(patterns=variables.PATTERNS,
                                                    banned_words=variables.BANNED_WORDS,
                                                    locations=variables.US_LOCATIONS)
        self.ad_copy_generator = AdCopyGenerator(config=variables.AD_COPY_GENERATOR_CONFIG)
        self.feature_extractor = FeatureExtractor(patterns=variables.FEATURE_EXTRACTOR_PATTERNS)
        self.text_generator = TextGenerator(key=secrets.KEY, model=secrets.MODEL,
                                            examples=variables.INVITATION_TEXT,
                                            features_ranking=variables.FEATURES_RANKING)

    def extract_headlines(self, remark, min_length):
        """
        Extract headlines from remark

        Parameters
        ----------
        remark : str
            inputted remark
        min_length : int
            Minimum length of extracted headlines

        Returns
        -------
        headlines : list
            extracted headlines or empty list
        """
        remark = preprocess_remark(remark=remark)
        headlines, all_patterns = self.headline_extractor.extract_headlines(remark, min_length)

        return headlines, all_patterns

    def generate_and_extract_patterns(self, remark, min_char, subtype, property_type, state,
                                      price=None, living_area=None):
        """
        Extract home-related patterns from given remark, generate new patterns from house info
        like price and add to the extracted ones

        Parameters
        ----------
        remark : str
            remark of a house
        min_char : int
            minimal length of extracted patterns
        subtype : int
            enum for subtype
        property_type : int
            enum for property type
        state : str
            location of the house
        price : float, int
            price of the house
        living_area : float, int
            area of the house

        Returns
        -------
        out : List
            extracted and generated patterns. Extracted ones are first.
        """
        remark = preprocess_remark(remark)
        best_patterns, _ = self.headline_extractor.extract_headlines(remark=remark,
                                                                     min_length=min_char)
        generated_patterns = self.ad_copy_generator.generate_patterns(subtype=subtype,
                                                                      property_type=property_type,
                                                                      state=state,
                                                                      price=price,
                                                                      living_area=living_area,
                                                                      min_char=min_char)
        all_patterns = list(best_patterns) + generated_patterns
        return all_patterns

    def prepare_home_data(self, remark, min_char, state, subtype, property_type,
                          price=None, living_area=None, nlp_features=None):
        """
        Generate a list of headlines based on house info by extracting descriptive patterns
        and/or adding additional feature info if needed

        Parameters
        ----------
        remark : str
            remark of a house
        min_char : int
            minimal length of extracted patterns
        state : str
            short name of the state where the house is
        subtype : int
            subtype of the house
        property_type : int
            property type of the house
        price : float
            price of the house
        living_area : float
            area of the house
        nlp_features : list of str
            additional features extracted with nlp service

        Returns
        -------
        out : List of str
            generated headlines
        out : List of str
            sorted and filtered home-related features
        out : List of str
            sentences extracted from the remark
        """
        all_patterns = self.generate_and_extract_patterns(remark=remark,
                                                          min_char=min_char,
                                                          subtype=subtype,
                                                          property_type=property_type,
                                                          state=state,
                                                          price=price,
                                                          living_area=living_area)
        sentences = self.headline_extractor.list_sentences(remark=remark)
        sorted_features = self.ad_copy_generator.filter_sort_nlp_features(
            nlp_features=nlp_features, state=state, subtype=subtype, property_type=property_type)

        return all_patterns, sorted_features, sentences

    def generate_all_headlines(self, remark, min_char, min_headline, state, subtype, property_type,
                               price=None, living_area=None, living_area_unit=None,
                               nlp_features=None, max_headline=40, max_n=3):
        """
        Generate a list of headlines based on house info by extracting descriptive patterns
        and/or adding additional feature info if needed

        Parameters
        ----------
        remark : str
            remark of a house
        min_char : int
            minimal length of extracted patterns
        min_headline : int
            minimal length of a headline. If there are no patterns of this length,
            other arguments will be used to create a longer headline
        state : str
            short name of the state where the house is
        subtype : int
            subtype of the house
        property_type : int
            property type of the house
        price : float
            price of the house
        living_area : float
            area of the house
        living_area_unit : str
            the unit in which the area is measured
        nlp_features : list of str
            additional features extracted with nlp service
        max_headline : int
            maximal length of the headline
        max_n : int
            maximal number of returned headlines

        Returns
        -------
        all_patterns : list of str
            all patterns extracted by headline extractor
        headlines : list of str
            headlines generated by ad copy generator using extracted patterns and home data
        sorted_nlp_features : list of str
            sorted nlp features of the house
        sents : list
            sentences extracted from the remark of the house
        """
        all_patterns, sorted_features, sents = self.prepare_home_data(remark=remark,
                                                                      min_char=min_char,
                                                                      subtype=subtype,
                                                                      property_type=property_type,
                                                                      state=state,
                                                                      price=price,
                                                                      living_area=living_area,
                                                                      nlp_features=nlp_features)

        all_patterns = list(filter(self.ad_copy_generator.is_valid_pattern, all_patterns))
        headlines = self.ad_copy_generator.generate_headlines(patterns=all_patterns,
                                                              min_headline=min_headline,
                                                              subtype=subtype,
                                                              property_type=property_type,
                                                              living_area=living_area,
                                                              living_area_unit=living_area_unit,
                                                              sorted_nlp_features=sorted_features,
                                                              max_headline=max_headline)
        headlines = list(headlines)  # sometimes filter obj is returned which isn't subscriptable
        return all_patterns, headlines, sorted_features, sents

    def generate_headlines(self, remark, min_char, min_headline, state, subtype, property_type,
                           price=None, living_area=None, living_area_unit=None,
                           nlp_features=None, max_headline=40, max_n=3):
        """
        Generate a list of headlines based on house info by extracting descriptive patterns
        and/or adding additional feature info if needed

        Parameters
        ----------
        remark : str
            remark of a house
        min_char : int
            minimal length of extracted patterns
        min_headline : int
            minimal length of a headline. If there are no patterns of this length,
            other arguments will be used to create a longer headline
        state : str
            short name of the state where the house is
        subtype : int
            subtype of the house
        property_type : int
            property type of the house
        price : float
            price of the house
        living_area : float
            area of the house
        living_area_unit : str
            the unit in which the area is measured
        nlp_features : list of str
            additional features extracted with nlp service
        max_headline : int
            maximal length of the headline
        max_n : int
            maximal number of returned headlines

        Returns
        -------
        out : list of str
            generated headlines
        """
        _, headlines, _, _ = self.generate_all_headlines(remark=remark,
                                                         min_char=min_char,
                                                         subtype=subtype,
                                                         property_type=property_type,
                                                         state=state,
                                                         price=price,
                                                         living_area=living_area,
                                                         nlp_features=nlp_features,
                                                         min_headline=min_headline,
                                                         living_area_unit=living_area_unit,
                                                         max_headline=max_headline)
        return headlines[:max_n]

    def sentencize(self, text: str):
        """
        Sentencize input text using spacy

        Parameters
        ----------
        text : str
            input text

        Returns
        --------
        out : list[str]
            list of sentences
        """
        return self.headline_extractor.list_sentences(remark=text)

    def generate_primary_text(self, patterns, subtype, property_type, city, state,
                              neighborhood=None, county=None, bedrooms=None, bathrooms=None,
                              nlp_features=None, exterior_style=None, image_tags=None):
        """
        Generate primary text for given house's ad based on available info

        Parameters
        ----------
        patterns : List[str]
            home-related textual patterns to use in the primary text
        subtype : int
            enum of the subtype
        property_type : int
            enum of the property_type
        city, state, neighborhood, county : str
            location of the house
        bedrooms : int
            number of bedrooms
        bathrooms : int
            number of bathrooms
        nlp_features : List of str
            features extracted from the remark via nlp-service
        image_tags : List of str
            features extracted from images via CNN networks
        exterior_style : str
            Architectural style of the house

        Returns
        --------
        out : str
            primary text for ad copy
        """
        sorted_features = self.ad_copy_generator.filter_sort_nlp_features(
            nlp_features=nlp_features, state=state, subtype=subtype, property_type=property_type)

        text = self.ad_copy_generator.generate_primary_text(subtype=subtype,
                                                            property_type=property_type,
                                                            city=city,
                                                            state=state,
                                                            neighborhood=neighborhood,
                                                            county=county,
                                                            bedrooms=bedrooms,
                                                            bathrooms=bathrooms,
                                                            sorted_nlp_features=sorted_features,
                                                            exterior_style=exterior_style,
                                                            image_tags=image_tags,
                                                            patterns=patterns)

        return text

    def generate_facebook_ad_copy(self, remark, min_char, min_headline, state, city, subtype, property_type,
                                  price=None, neighborhood=None, county=None, living_area=None,
                                  living_area_unit=None, nlp_features=None, max_headline=40, max_n=3,
                                  bedrooms=None, bathrooms=None, exterior_style=None, image_tags=None,
                                  first_sentence_only=True):
        """
        Generate a list of headlines as well as a primary text from given house data

        Parameters
        ----------
        remark : str
            remark of a house
        min_char : int
            minimal length of extracted patterns
        min_headline : int
            minimal length of a headline. If there are no patterns of this length,
            other arguments will be used to create a longer headline
        state, city, neighborhood, county : str
            location of the house
        subtype : int
            subtype of the house
        property_type : int
            property type of the house
        price : float
            price of the house
        living_area : float
            area of the house
        living_area_unit : str
            the unit in which the area is measured
        nlp_features : list of str
            additional features extracted with nlp service
        max_headline : int
            maximal length of the headline
        max_n : int
            maximal number of returned headlines
        bedrooms : int
            number of bedrooms
        bathrooms : int
            number of bathrooms
        exterior_style : str
            Architectural style of the house
        image_tags : List of str
            features of the house extracted from images
        first_sentence_only : bool
            whether to return only the first sentence of the primary text

        Returns
        -------
        out : List of str
            generated headlines
        out : str
            generated primary text
        """
        all_patterns, headlines, sorted_features, sents = \
            self.generate_all_headlines(
                remark=remark,
                min_char=min_char,
                subtype=subtype,
                property_type=property_type,
                state=state,
                price=price,
                living_area=living_area,
                nlp_features=nlp_features,
                min_headline=min_headline,
                living_area_unit=living_area_unit,
                max_headline=max_headline)

        headlines = headlines[:max_n]

        unused_patterns = remove_used_strings(to_filter=all_patterns,
                                              by=headlines)
        if first_sentence_only:
            primary_text, _ = \
                self.ad_copy_generator.generate_describing_sentence(subtype=subtype,
                                                                    property_type=property_type,
                                                                    city=city,
                                                                    state=state,
                                                                    neighborhood=neighborhood,
                                                                    county=county,
                                                                    bedrooms=bedrooms,
                                                                    bathrooms=bathrooms,
                                                                    sorted_nlp_features=sorted_features,
                                                                    exterior_style=exterior_style,
                                                                    image_tags=image_tags,
                                                                    patterns=unused_patterns)
        else:
            primary_text = \
                self.ad_copy_generator.generate_primary_text(subtype=subtype,
                                                             property_type=property_type,
                                                             city=city,
                                                             state=state,
                                                             neighborhood=neighborhood,
                                                             county=county,
                                                             bedrooms=bedrooms,
                                                             bathrooms=bathrooms,
                                                             sorted_nlp_features=sorted_features,
                                                             exterior_style=exterior_style,
                                                             image_tags=image_tags,
                                                             patterns=unused_patterns,
                                                             all_sentences=sents)

        return headlines, primary_text

    async def generate_google_ad_copy(self,
                                      generation_params: GenerationParams,
                                      postprocessing_params: PostprocessingParams,
                                      home_data: GoogleAdCopyHomeData,
                                      min_char: int = 1) -> Tuple[Union[GenerateGoogleAdCopyHeadlines, None], float]:
        """
        Generate short headlines, long headlines, and descriptions.

        Parameters
        ----------
        generation_params: GenerationParams
            Schema for text generation params
        postprocessing_params: PostprocessingParams
            Schema for postprocessing params
        home_data: GoogleAdCopyHomeData
            Schema of a property data.
        min_char : Optional[int]
            minimal length of extracted patterns, positive number

        Returns
        -------
        out : Tuple[Union[GenerateGoogleAdCopyHeadlines, None], float]
        """
        if home_data.bedrooms == 0:
            home_data.bedrooms = None

        if home_data.bathrooms == 0:
            home_data.bathrooms = None

        if home_data.subtype == 9:
            headlines = await (self.text_generator.generate_google_ad_copy(
                generation_params=generation_params,
                postprocessing_params=postprocessing_params,
                home_data=home_data))
            return headlines

        if home_data.features:
            sorted_features = self.ad_copy_generator.filter_sort_nlp_features(nlp_features=home_data.features,
                                                                              state=home_data.state,
                                                                              subtype=home_data.subtype,
                                                                              property_type=home_data.property_type)
            all_unique_features = list(set([element.lower() for element in home_data.features]))
            unimportant_features = [element for element in all_unique_features if element not in sorted_features]
        else:
            sorted_features = None
            unimportant_features = None
        if home_data.subtype == 11:
            headlines = await (self.text_generator.generate_google_ad_copy(
                generation_params=generation_params,
                postprocessing_params=postprocessing_params,
                home_data=home_data,
                important_features=sorted_features,
                unimportant_features=unimportant_features,
            ))
            return headlines
        else:
            generated_phrases = self.ad_copy_generator.generate_patterns(subtype=home_data.subtype,
                                                                         property_type=home_data.property_type,
                                                                         state=home_data.state,
                                                                         price=home_data.price,
                                                                         living_area=home_data.living_area,
                                                                         min_char=min_char)
            generated_adjectives = [ADJECTIVES_MAPPING.get(string.split()[0]) for string in generated_phrases]

            if (home_data.features is None and home_data.remark is None and
                    home_data.bedrooms is None and home_data.bathrooms is None):
                limited_data = True
            else:
                limited_data = False

            headlines = await (self.text_generator.generate_google_ad_copy(
                generation_params=generation_params,
                postprocessing_params=postprocessing_params,
                home_data=home_data,
                important_features=sorted_features,
                unimportant_features=unimportant_features,
                generated_adjectives=generated_adjectives,
                limited_data=limited_data))
            return headlines

    def extract_features(self, remark):
        """
        Extract features from remark

        Parameters
        ----------
        remark : str
            inputted remark

        Returns
        -------
        features : list
            extracted features or empty list

        """
        remark = preprocess_remark(remark=remark)
        remark = remark.lower()
        features = self.feature_extractor.extract_features(remark)

        return features

    def return_unique_features(self):
        """
        Return unique features

        Returns
        -------
        unique_features : list
            unique features

        """
        unique_features = self.feature_extractor.return_unique_features()
        return unique_features

    def generate_invitation_text(self, collection_data, client_name, agent_name):
        """
        Generate client invitation text

        Parameters
        ----------
        collection_data : list
            Properties data in collection
        client_name : str
            Client name
        agent_name : str
            Agent name

        Returns
        -------
        greeting : str
            Client greeting part of email
        system_introduction : str
            Introduction of system
        collection_information : str
            Collection information generated by ChatGPT
        closing : str
            Closing part of email
        full_text : str
            Email full text
        status_code : int
            status code of ChatGPT call
        message : str
            message of ChatGPT call
        """
        greeting, system_introduction, collection_information, closing, full_text, status_code, message = \
            self.text_generator.generate_invitation(collection_data, client_name, agent_name)

        return greeting, system_introduction, collection_information, closing, full_text, status_code, \
            message

    def generate_location_based_text(self, location, words_count):
        """
        Generate text and title for the given location
        Parameters
        ----------
        location : str
            a location in the USA
        words_count : int
            the approximate number of words in generated text

        Returns
        -------
        text_title : str
            generated text and title
        status_code : int
            status_code of ChatGPT response
        message : str
            message of ChatGPT response
        """
        text_title, status_code, message = self.text_generator.generate_location_based_text(
            location, words_count)
        return text_title, status_code, message

__init__()

Initiate headline extractor

Source code in app/services.py
def __init__(self):
    """
    Initiate headline extractor
    """
    self.headline_extractor = HeadlineExtractor(patterns=variables.PATTERNS,
                                                banned_words=variables.BANNED_WORDS,
                                                locations=variables.US_LOCATIONS)
    self.ad_copy_generator = AdCopyGenerator(config=variables.AD_COPY_GENERATOR_CONFIG)
    self.feature_extractor = FeatureExtractor(patterns=variables.FEATURE_EXTRACTOR_PATTERNS)
    self.text_generator = TextGenerator(key=secrets.KEY, model=secrets.MODEL,
                                        examples=variables.INVITATION_TEXT,
                                        features_ranking=variables.FEATURES_RANKING)

extract_headlines(remark, min_length)

Extract headlines from remark

Parameters:

Name Type Description Default
remark str

inputted remark

required
min_length int

Minimum length of extracted headlines

required

Returns:

Name Type Description
headlines list

extracted headlines or empty list

Source code in app/services.py
def extract_headlines(self, remark, min_length):
    """
    Extract headlines from remark

    Parameters
    ----------
    remark : str
        inputted remark
    min_length : int
        Minimum length of extracted headlines

    Returns
    -------
    headlines : list
        extracted headlines or empty list
    """
    remark = preprocess_remark(remark=remark)
    headlines, all_patterns = self.headline_extractor.extract_headlines(remark, min_length)

    return headlines, all_patterns

generate_and_extract_patterns(remark, min_char, subtype, property_type, state, price=None, living_area=None)

Extract home-related patterns from given remark, generate new patterns from house info like price and add to the extracted ones

Parameters:

Name Type Description Default
remark str

remark of a house

required
min_char int

minimal length of extracted patterns

required
subtype int

enum for subtype

required
property_type int

enum for property type

required
state str

location of the house

required
price (float, int)

price of the house

None
living_area (float, int)

area of the house

None

Returns:

Name Type Description
out List

extracted and generated patterns. Extracted ones are first.

Source code in app/services.py
def generate_and_extract_patterns(self, remark, min_char, subtype, property_type, state,
                                  price=None, living_area=None):
    """
    Extract home-related patterns from given remark, generate new patterns from house info
    like price and add to the extracted ones

    Parameters
    ----------
    remark : str
        remark of a house
    min_char : int
        minimal length of extracted patterns
    subtype : int
        enum for subtype
    property_type : int
        enum for property type
    state : str
        location of the house
    price : float, int
        price of the house
    living_area : float, int
        area of the house

    Returns
    -------
    out : List
        extracted and generated patterns. Extracted ones are first.
    """
    remark = preprocess_remark(remark)
    best_patterns, _ = self.headline_extractor.extract_headlines(remark=remark,
                                                                 min_length=min_char)
    generated_patterns = self.ad_copy_generator.generate_patterns(subtype=subtype,
                                                                  property_type=property_type,
                                                                  state=state,
                                                                  price=price,
                                                                  living_area=living_area,
                                                                  min_char=min_char)
    all_patterns = list(best_patterns) + generated_patterns
    return all_patterns

prepare_home_data(remark, min_char, state, subtype, property_type, price=None, living_area=None, nlp_features=None)

Generate a list of headlines based on house info by extracting descriptive patterns and/or adding additional feature info if needed

Parameters:

Name Type Description Default
remark str

remark of a house

required
min_char int

minimal length of extracted patterns

required
state str

short name of the state where the house is

required
subtype int

subtype of the house

required
property_type int

property type of the house

required
price float

price of the house

None
living_area float

area of the house

None
nlp_features list of str

additional features extracted with nlp service

None

Returns:

Name Type Description
out List of str

generated headlines

out List of str

sorted and filtered home-related features

out List of str

sentences extracted from the remark

Source code in app/services.py
def prepare_home_data(self, remark, min_char, state, subtype, property_type,
                      price=None, living_area=None, nlp_features=None):
    """
    Generate a list of headlines based on house info by extracting descriptive patterns
    and/or adding additional feature info if needed

    Parameters
    ----------
    remark : str
        remark of a house
    min_char : int
        minimal length of extracted patterns
    state : str
        short name of the state where the house is
    subtype : int
        subtype of the house
    property_type : int
        property type of the house
    price : float
        price of the house
    living_area : float
        area of the house
    nlp_features : list of str
        additional features extracted with nlp service

    Returns
    -------
    out : List of str
        generated headlines
    out : List of str
        sorted and filtered home-related features
    out : List of str
        sentences extracted from the remark
    """
    all_patterns = self.generate_and_extract_patterns(remark=remark,
                                                      min_char=min_char,
                                                      subtype=subtype,
                                                      property_type=property_type,
                                                      state=state,
                                                      price=price,
                                                      living_area=living_area)
    sentences = self.headline_extractor.list_sentences(remark=remark)
    sorted_features = self.ad_copy_generator.filter_sort_nlp_features(
        nlp_features=nlp_features, state=state, subtype=subtype, property_type=property_type)

    return all_patterns, sorted_features, sentences

generate_all_headlines(remark, min_char, min_headline, state, subtype, property_type, price=None, living_area=None, living_area_unit=None, nlp_features=None, max_headline=40, max_n=3)

Generate a list of headlines based on house info by extracting descriptive patterns and/or adding additional feature info if needed

Parameters:

Name Type Description Default
remark str

remark of a house

required
min_char int

minimal length of extracted patterns

required
min_headline int

minimal length of a headline. If there are no patterns of this length, other arguments will be used to create a longer headline

required
state str

short name of the state where the house is

required
subtype int

subtype of the house

required
property_type int

property type of the house

required
price float

price of the house

None
living_area float

area of the house

None
living_area_unit str

the unit in which the area is measured

None
nlp_features list of str

additional features extracted with nlp service

None
max_headline int

maximal length of the headline

40
max_n int

maximal number of returned headlines

3

Returns:

Name Type Description
all_patterns list of str

all patterns extracted by headline extractor

headlines list of str

headlines generated by ad copy generator using extracted patterns and home data

sorted_nlp_features list of str

sorted nlp features of the house

sents list

sentences extracted from the remark of the house

Source code in app/services.py
def generate_all_headlines(self, remark, min_char, min_headline, state, subtype, property_type,
                           price=None, living_area=None, living_area_unit=None,
                           nlp_features=None, max_headline=40, max_n=3):
    """
    Generate a list of headlines based on house info by extracting descriptive patterns
    and/or adding additional feature info if needed

    Parameters
    ----------
    remark : str
        remark of a house
    min_char : int
        minimal length of extracted patterns
    min_headline : int
        minimal length of a headline. If there are no patterns of this length,
        other arguments will be used to create a longer headline
    state : str
        short name of the state where the house is
    subtype : int
        subtype of the house
    property_type : int
        property type of the house
    price : float
        price of the house
    living_area : float
        area of the house
    living_area_unit : str
        the unit in which the area is measured
    nlp_features : list of str
        additional features extracted with nlp service
    max_headline : int
        maximal length of the headline
    max_n : int
        maximal number of returned headlines

    Returns
    -------
    all_patterns : list of str
        all patterns extracted by headline extractor
    headlines : list of str
        headlines generated by ad copy generator using extracted patterns and home data
    sorted_nlp_features : list of str
        sorted nlp features of the house
    sents : list
        sentences extracted from the remark of the house
    """
    all_patterns, sorted_features, sents = self.prepare_home_data(remark=remark,
                                                                  min_char=min_char,
                                                                  subtype=subtype,
                                                                  property_type=property_type,
                                                                  state=state,
                                                                  price=price,
                                                                  living_area=living_area,
                                                                  nlp_features=nlp_features)

    all_patterns = list(filter(self.ad_copy_generator.is_valid_pattern, all_patterns))
    headlines = self.ad_copy_generator.generate_headlines(patterns=all_patterns,
                                                          min_headline=min_headline,
                                                          subtype=subtype,
                                                          property_type=property_type,
                                                          living_area=living_area,
                                                          living_area_unit=living_area_unit,
                                                          sorted_nlp_features=sorted_features,
                                                          max_headline=max_headline)
    headlines = list(headlines)  # sometimes filter obj is returned which isn't subscriptable
    return all_patterns, headlines, sorted_features, sents

generate_headlines(remark, min_char, min_headline, state, subtype, property_type, price=None, living_area=None, living_area_unit=None, nlp_features=None, max_headline=40, max_n=3)

Generate a list of headlines based on house info by extracting descriptive patterns and/or adding additional feature info if needed

Parameters:

Name Type Description Default
remark str

remark of a house

required
min_char int

minimal length of extracted patterns

required
min_headline int

minimal length of a headline. If there are no patterns of this length, other arguments will be used to create a longer headline

required
state str

short name of the state where the house is

required
subtype int

subtype of the house

required
property_type int

property type of the house

required
price float

price of the house

None
living_area float

area of the house

None
living_area_unit str

the unit in which the area is measured

None
nlp_features list of str

additional features extracted with nlp service

None
max_headline int

maximal length of the headline

40
max_n int

maximal number of returned headlines

3

Returns:

Name Type Description
out list of str

generated headlines

Source code in app/services.py
def generate_headlines(self, remark, min_char, min_headline, state, subtype, property_type,
                       price=None, living_area=None, living_area_unit=None,
                       nlp_features=None, max_headline=40, max_n=3):
    """
    Generate a list of headlines based on house info by extracting descriptive patterns
    and/or adding additional feature info if needed

    Parameters
    ----------
    remark : str
        remark of a house
    min_char : int
        minimal length of extracted patterns
    min_headline : int
        minimal length of a headline. If there are no patterns of this length,
        other arguments will be used to create a longer headline
    state : str
        short name of the state where the house is
    subtype : int
        subtype of the house
    property_type : int
        property type of the house
    price : float
        price of the house
    living_area : float
        area of the house
    living_area_unit : str
        the unit in which the area is measured
    nlp_features : list of str
        additional features extracted with nlp service
    max_headline : int
        maximal length of the headline
    max_n : int
        maximal number of returned headlines

    Returns
    -------
    out : list of str
        generated headlines
    """
    _, headlines, _, _ = self.generate_all_headlines(remark=remark,
                                                     min_char=min_char,
                                                     subtype=subtype,
                                                     property_type=property_type,
                                                     state=state,
                                                     price=price,
                                                     living_area=living_area,
                                                     nlp_features=nlp_features,
                                                     min_headline=min_headline,
                                                     living_area_unit=living_area_unit,
                                                     max_headline=max_headline)
    return headlines[:max_n]

sentencize(text)

Sentencize input text using spacy

Parameters:

Name Type Description Default
text str

input text

required

Returns:

Name Type Description
out list[str]

list of sentences

Source code in app/services.py
def sentencize(self, text: str):
    """
    Sentencize input text using spacy

    Parameters
    ----------
    text : str
        input text

    Returns
    --------
    out : list[str]
        list of sentences
    """
    return self.headline_extractor.list_sentences(remark=text)

generate_primary_text(patterns, subtype, property_type, city, state, neighborhood=None, county=None, bedrooms=None, bathrooms=None, nlp_features=None, exterior_style=None, image_tags=None)

Generate primary text for given house's ad based on available info

Parameters:

Name Type Description Default
patterns List[str]

home-related textual patterns to use in the primary text

required
subtype int

enum of the subtype

required
property_type int

enum of the property_type

required
city str

location of the house

required
state str

location of the house

required
neighborhood str

location of the house

required
county str

location of the house

required
bedrooms int

number of bedrooms

None
bathrooms int

number of bathrooms

None
nlp_features List of str

features extracted from the remark via nlp-service

None
image_tags List of str

features extracted from images via CNN networks

None
exterior_style str

Architectural style of the house

None

Returns:

Name Type Description
out str

primary text for ad copy

Source code in app/services.py
def generate_primary_text(self, patterns, subtype, property_type, city, state,
                          neighborhood=None, county=None, bedrooms=None, bathrooms=None,
                          nlp_features=None, exterior_style=None, image_tags=None):
    """
    Generate primary text for given house's ad based on available info

    Parameters
    ----------
    patterns : List[str]
        home-related textual patterns to use in the primary text
    subtype : int
        enum of the subtype
    property_type : int
        enum of the property_type
    city, state, neighborhood, county : str
        location of the house
    bedrooms : int
        number of bedrooms
    bathrooms : int
        number of bathrooms
    nlp_features : List of str
        features extracted from the remark via nlp-service
    image_tags : List of str
        features extracted from images via CNN networks
    exterior_style : str
        Architectural style of the house

    Returns
    --------
    out : str
        primary text for ad copy
    """
    sorted_features = self.ad_copy_generator.filter_sort_nlp_features(
        nlp_features=nlp_features, state=state, subtype=subtype, property_type=property_type)

    text = self.ad_copy_generator.generate_primary_text(subtype=subtype,
                                                        property_type=property_type,
                                                        city=city,
                                                        state=state,
                                                        neighborhood=neighborhood,
                                                        county=county,
                                                        bedrooms=bedrooms,
                                                        bathrooms=bathrooms,
                                                        sorted_nlp_features=sorted_features,
                                                        exterior_style=exterior_style,
                                                        image_tags=image_tags,
                                                        patterns=patterns)

    return text

generate_facebook_ad_copy(remark, min_char, min_headline, state, city, subtype, property_type, price=None, neighborhood=None, county=None, living_area=None, living_area_unit=None, nlp_features=None, max_headline=40, max_n=3, bedrooms=None, bathrooms=None, exterior_style=None, image_tags=None, first_sentence_only=True)

Generate a list of headlines as well as a primary text from given house data

Parameters:

Name Type Description Default
remark str

remark of a house

required
min_char int

minimal length of extracted patterns

required
min_headline int

minimal length of a headline. If there are no patterns of this length, other arguments will be used to create a longer headline

required
state str

location of the house

required
city str

location of the house

required
neighborhood str

location of the house

required
county str

location of the house

required
subtype int

subtype of the house

required
property_type int

property type of the house

required
price float

price of the house

None
living_area float

area of the house

None
living_area_unit str

the unit in which the area is measured

None
nlp_features list of str

additional features extracted with nlp service

None
max_headline int

maximal length of the headline

40
max_n int

maximal number of returned headlines

3
bedrooms int

number of bedrooms

None
bathrooms int

number of bathrooms

None
exterior_style str

Architectural style of the house

None
image_tags List of str

features of the house extracted from images

None
first_sentence_only bool

whether to return only the first sentence of the primary text

True

Returns:

Name Type Description
out List of str

generated headlines

out str

generated primary text

Source code in app/services.py
def generate_facebook_ad_copy(self, remark, min_char, min_headline, state, city, subtype, property_type,
                              price=None, neighborhood=None, county=None, living_area=None,
                              living_area_unit=None, nlp_features=None, max_headline=40, max_n=3,
                              bedrooms=None, bathrooms=None, exterior_style=None, image_tags=None,
                              first_sentence_only=True):
    """
    Generate a list of headlines as well as a primary text from given house data

    Parameters
    ----------
    remark : str
        remark of a house
    min_char : int
        minimal length of extracted patterns
    min_headline : int
        minimal length of a headline. If there are no patterns of this length,
        other arguments will be used to create a longer headline
    state, city, neighborhood, county : str
        location of the house
    subtype : int
        subtype of the house
    property_type : int
        property type of the house
    price : float
        price of the house
    living_area : float
        area of the house
    living_area_unit : str
        the unit in which the area is measured
    nlp_features : list of str
        additional features extracted with nlp service
    max_headline : int
        maximal length of the headline
    max_n : int
        maximal number of returned headlines
    bedrooms : int
        number of bedrooms
    bathrooms : int
        number of bathrooms
    exterior_style : str
        Architectural style of the house
    image_tags : List of str
        features of the house extracted from images
    first_sentence_only : bool
        whether to return only the first sentence of the primary text

    Returns
    -------
    out : List of str
        generated headlines
    out : str
        generated primary text
    """
    all_patterns, headlines, sorted_features, sents = \
        self.generate_all_headlines(
            remark=remark,
            min_char=min_char,
            subtype=subtype,
            property_type=property_type,
            state=state,
            price=price,
            living_area=living_area,
            nlp_features=nlp_features,
            min_headline=min_headline,
            living_area_unit=living_area_unit,
            max_headline=max_headline)

    headlines = headlines[:max_n]

    unused_patterns = remove_used_strings(to_filter=all_patterns,
                                          by=headlines)
    if first_sentence_only:
        primary_text, _ = \
            self.ad_copy_generator.generate_describing_sentence(subtype=subtype,
                                                                property_type=property_type,
                                                                city=city,
                                                                state=state,
                                                                neighborhood=neighborhood,
                                                                county=county,
                                                                bedrooms=bedrooms,
                                                                bathrooms=bathrooms,
                                                                sorted_nlp_features=sorted_features,
                                                                exterior_style=exterior_style,
                                                                image_tags=image_tags,
                                                                patterns=unused_patterns)
    else:
        primary_text = \
            self.ad_copy_generator.generate_primary_text(subtype=subtype,
                                                         property_type=property_type,
                                                         city=city,
                                                         state=state,
                                                         neighborhood=neighborhood,
                                                         county=county,
                                                         bedrooms=bedrooms,
                                                         bathrooms=bathrooms,
                                                         sorted_nlp_features=sorted_features,
                                                         exterior_style=exterior_style,
                                                         image_tags=image_tags,
                                                         patterns=unused_patterns,
                                                         all_sentences=sents)

    return headlines, primary_text

generate_google_ad_copy(generation_params, postprocessing_params, home_data, min_char=1) async

Generate short headlines, long headlines, and descriptions.

Parameters:

Name Type Description Default
generation_params GenerationParams

Schema for text generation params

required
postprocessing_params PostprocessingParams

Schema for postprocessing params

required
home_data GoogleAdCopyHomeData

Schema of a property data.

required
min_char Optional[int]

minimal length of extracted patterns, positive number

1

Returns:

Name Type Description
out Tuple[Union[GenerateGoogleAdCopyHeadlines, None], float]
Source code in app/services.py
async def generate_google_ad_copy(self,
                                  generation_params: GenerationParams,
                                  postprocessing_params: PostprocessingParams,
                                  home_data: GoogleAdCopyHomeData,
                                  min_char: int = 1) -> Tuple[Union[GenerateGoogleAdCopyHeadlines, None], float]:
    """
    Generate short headlines, long headlines, and descriptions.

    Parameters
    ----------
    generation_params: GenerationParams
        Schema for text generation params
    postprocessing_params: PostprocessingParams
        Schema for postprocessing params
    home_data: GoogleAdCopyHomeData
        Schema of a property data.
    min_char : Optional[int]
        minimal length of extracted patterns, positive number

    Returns
    -------
    out : Tuple[Union[GenerateGoogleAdCopyHeadlines, None], float]
    """
    if home_data.bedrooms == 0:
        home_data.bedrooms = None

    if home_data.bathrooms == 0:
        home_data.bathrooms = None

    if home_data.subtype == 9:
        headlines = await (self.text_generator.generate_google_ad_copy(
            generation_params=generation_params,
            postprocessing_params=postprocessing_params,
            home_data=home_data))
        return headlines

    if home_data.features:
        sorted_features = self.ad_copy_generator.filter_sort_nlp_features(nlp_features=home_data.features,
                                                                          state=home_data.state,
                                                                          subtype=home_data.subtype,
                                                                          property_type=home_data.property_type)
        all_unique_features = list(set([element.lower() for element in home_data.features]))
        unimportant_features = [element for element in all_unique_features if element not in sorted_features]
    else:
        sorted_features = None
        unimportant_features = None
    if home_data.subtype == 11:
        headlines = await (self.text_generator.generate_google_ad_copy(
            generation_params=generation_params,
            postprocessing_params=postprocessing_params,
            home_data=home_data,
            important_features=sorted_features,
            unimportant_features=unimportant_features,
        ))
        return headlines
    else:
        generated_phrases = self.ad_copy_generator.generate_patterns(subtype=home_data.subtype,
                                                                     property_type=home_data.property_type,
                                                                     state=home_data.state,
                                                                     price=home_data.price,
                                                                     living_area=home_data.living_area,
                                                                     min_char=min_char)
        generated_adjectives = [ADJECTIVES_MAPPING.get(string.split()[0]) for string in generated_phrases]

        if (home_data.features is None and home_data.remark is None and
                home_data.bedrooms is None and home_data.bathrooms is None):
            limited_data = True
        else:
            limited_data = False

        headlines = await (self.text_generator.generate_google_ad_copy(
            generation_params=generation_params,
            postprocessing_params=postprocessing_params,
            home_data=home_data,
            important_features=sorted_features,
            unimportant_features=unimportant_features,
            generated_adjectives=generated_adjectives,
            limited_data=limited_data))
        return headlines

extract_features(remark)

Extract features from remark

Parameters:

Name Type Description Default
remark str

inputted remark

required

Returns:

Name Type Description
features list

extracted features or empty list

Source code in app/services.py
def extract_features(self, remark):
    """
    Extract features from remark

    Parameters
    ----------
    remark : str
        inputted remark

    Returns
    -------
    features : list
        extracted features or empty list

    """
    remark = preprocess_remark(remark=remark)
    remark = remark.lower()
    features = self.feature_extractor.extract_features(remark)

    return features

return_unique_features()

Return unique features

Returns:

Name Type Description
unique_features list

unique features

Source code in app/services.py
def return_unique_features(self):
    """
    Return unique features

    Returns
    -------
    unique_features : list
        unique features

    """
    unique_features = self.feature_extractor.return_unique_features()
    return unique_features

generate_invitation_text(collection_data, client_name, agent_name)

Generate client invitation text

Parameters:

Name Type Description Default
collection_data list

Properties data in collection

required
client_name str

Client name

required
agent_name str

Agent name

required

Returns:

Name Type Description
greeting str

Client greeting part of email

system_introduction str

Introduction of system

collection_information str

Collection information generated by ChatGPT

closing str

Closing part of email

full_text str

Email full text

status_code int

status code of ChatGPT call

message str

message of ChatGPT call

Source code in app/services.py
def generate_invitation_text(self, collection_data, client_name, agent_name):
    """
    Generate client invitation text

    Parameters
    ----------
    collection_data : list
        Properties data in collection
    client_name : str
        Client name
    agent_name : str
        Agent name

    Returns
    -------
    greeting : str
        Client greeting part of email
    system_introduction : str
        Introduction of system
    collection_information : str
        Collection information generated by ChatGPT
    closing : str
        Closing part of email
    full_text : str
        Email full text
    status_code : int
        status code of ChatGPT call
    message : str
        message of ChatGPT call
    """
    greeting, system_introduction, collection_information, closing, full_text, status_code, message = \
        self.text_generator.generate_invitation(collection_data, client_name, agent_name)

    return greeting, system_introduction, collection_information, closing, full_text, status_code, \
        message

generate_location_based_text(location, words_count)

Generate text and title for the given location

Parameters:

Name Type Description Default
location str

a location in the USA

required
words_count int

the approximate number of words in generated text

required

Returns:

Name Type Description
text_title str

generated text and title

status_code int

status_code of ChatGPT response

message str

message of ChatGPT response

Source code in app/services.py
def generate_location_based_text(self, location, words_count):
    """
    Generate text and title for the given location
    Parameters
    ----------
    location : str
        a location in the USA
    words_count : int
        the approximate number of words in generated text

    Returns
    -------
    text_title : str
        generated text and title
    status_code : int
        status_code of ChatGPT response
    message : str
        message of ChatGPT response
    """
    text_title, status_code, message = self.text_generator.generate_location_based_text(
        location, words_count)
    return text_title, status_code, message

Middlewares

LoggingMiddleware

Bases: BaseHTTPMiddleware

Basic logging middleware inherited from starlette.BaseHTTPMiddleware

Source code in app/middlewares.py
class LoggingMiddleware(BaseHTTPMiddleware):
    """
    Basic logging middleware inherited from starlette.BaseHTTPMiddleware
    """

    def __init__(
            self,
            app,
            logger
    ):
        """
        Init object with app and logger

        Parameters
        ----------
        app : fastapi.FastAPI
            application object where middleware need to be added
        logger : logger
            already configured logger for logging requests
        """
        super().__init__(app)
        self.logger = logger

    async def dispatch(self, request: Request, call_next):

        """
        Overriding BaseHTTPMiddleware.dispatch method to implement logging logic

        Parameters
        ----------
        request : Request
            current request
        call_next : starlette.middleware.base.RequestResponseEndpoint
            call function
        """

        guid = generate_guid()
        self.logger.info(f"rid={guid} start request path={request.url.path}")
        start_time = time()

        streaming_response = await call_next(request)
        status_code = streaming_response.status_code
        process_time = (time() - start_time) * 1000
        formatted_process_time = '{0:.2f}'.format(process_time)

        # collect errors and log also error messages
        if status_code != status.HTTP_200_OK:
            response_body = [section async for section in streaming_response.body_iterator]
            streaming_response.body_iterator = iterate_in_threadpool(iter(response_body))
            msg = response_body[0].decode()
            self.logger.info(
                f"rid={guid} completed_in={formatted_process_time}ms"
                f" status_code={status_code}"
                f" response = {response_body},"
                f" message = {msg}")
        # for 200 OK requests logging only event
        else:
            self.logger.info(
                f"rid={guid} completed_in={formatted_process_time}ms status_code={status_code}")

        return streaming_response

__init__(app, logger)

Init object with app and logger

Parameters:

Name Type Description Default
app FastAPI

application object where middleware need to be added

required
logger logger

already configured logger for logging requests

required
Source code in app/middlewares.py
def __init__(
        self,
        app,
        logger
):
    """
    Init object with app and logger

    Parameters
    ----------
    app : fastapi.FastAPI
        application object where middleware need to be added
    logger : logger
        already configured logger for logging requests
    """
    super().__init__(app)
    self.logger = logger

dispatch(request, call_next) async

Overriding BaseHTTPMiddleware.dispatch method to implement logging logic

Parameters:

Name Type Description Default
request Request

current request

required
call_next RequestResponseEndpoint

call function

required
Source code in app/middlewares.py
async def dispatch(self, request: Request, call_next):

    """
    Overriding BaseHTTPMiddleware.dispatch method to implement logging logic

    Parameters
    ----------
    request : Request
        current request
    call_next : starlette.middleware.base.RequestResponseEndpoint
        call function
    """

    guid = generate_guid()
    self.logger.info(f"rid={guid} start request path={request.url.path}")
    start_time = time()

    streaming_response = await call_next(request)
    status_code = streaming_response.status_code
    process_time = (time() - start_time) * 1000
    formatted_process_time = '{0:.2f}'.format(process_time)

    # collect errors and log also error messages
    if status_code != status.HTTP_200_OK:
        response_body = [section async for section in streaming_response.body_iterator]
        streaming_response.body_iterator = iterate_in_threadpool(iter(response_body))
        msg = response_body[0].decode()
        self.logger.info(
            f"rid={guid} completed_in={formatted_process_time}ms"
            f" status_code={status_code}"
            f" response = {response_body},"
            f" message = {msg}")
    # for 200 OK requests logging only event
    else:
        self.logger.info(
            f"rid={guid} completed_in={formatted_process_time}ms status_code={status_code}")

    return streaming_response

ExceptionHandlerMiddleware

Bases: BaseHTTPMiddleware

A middleware to handle errors

Source code in app/middlewares.py
class ExceptionHandlerMiddleware(BaseHTTPMiddleware):
    """A middleware to handle errors"""

    async def dispatch(self, request: Request, call_next):
        """Try to process the request. If failed, return details about the exception"""
        try:
            return await call_next(request)
        except GoogleAdCopyGenerationError as e:
            info = self.extract_info(e)
            status_code = 503
        except FHValidationError as e:
            info = self.extract_info(e)
            status_code = 503
        except FailedToParaphraseViolations as e:
            info = self.extract_info(e)
            status_code = 503
        except Exception as e:
            info = self.extract_info(e)
            status_code = 500

        content = {"detail": [info]}
        return JSONResponse(status_code=status_code, content=content)

    @staticmethod
    def extract_info(error: BaseException):
        """Extract the type and the message of an error and return as a dict"""
        return {"type": type(error).__name__,
                "msg": str(error)}

dispatch(request, call_next) async

Try to process the request. If failed, return details about the exception

Source code in app/middlewares.py
async def dispatch(self, request: Request, call_next):
    """Try to process the request. If failed, return details about the exception"""
    try:
        return await call_next(request)
    except GoogleAdCopyGenerationError as e:
        info = self.extract_info(e)
        status_code = 503
    except FHValidationError as e:
        info = self.extract_info(e)
        status_code = 503
    except FailedToParaphraseViolations as e:
        info = self.extract_info(e)
        status_code = 503
    except Exception as e:
        info = self.extract_info(e)
        status_code = 500

    content = {"detail": [info]}
    return JSONResponse(status_code=status_code, content=content)

extract_info(error) staticmethod

Extract the type and the message of an error and return as a dict

Source code in app/middlewares.py
@staticmethod
def extract_info(error: BaseException):
    """Extract the type and the message of an error and return as a dict"""
    return {"type": type(error).__name__,
            "msg": str(error)}