Skip to content

Generators Module

anonipy.anonymize.generators

Module containing the generators.

The generators module provides a set of generators used to generate data substitutes.

Classes:

Name Description
LLMLabelGenerator

The class representing the label generator utilizing LLMs.

MaskLabelGenerator

The class representing the label generator utilizing token masking.

NumberGenerator

The class representing the number generator.

DateGenerator

The class representing the date generator.

anonipy.anonymize.generators.LLMLabelGenerator

Bases: GeneratorInterface

The class representing the LLM label generator.

Examples:

>>> from anonipy.anonymize.generators import LLMLabelGenerator
>>> generator = LLMLabelGenerator()
>>> generator.generate(entity)

Attributes:

Name Type Description
model Transformers

The model used to generate the label substitutes.

Methods:

Name Description
generate

Generate the label based on the entity.

Source code in anonipy/anonymize/generators/llm_label_generator.py
class LLMLabelGenerator(GeneratorInterface):
    """The class representing the LLM label generator.

    Examples:
        >>> from anonipy.anonymize.generators import LLMLabelGenerator
        >>> generator = LLMLabelGenerator()
        >>> generator.generate(entity)

    Attributes:
        model (models.Transformers): The model used to generate the label substitutes.

    Methods:
        generate(entity, entity_prefix, temperature):
            Generate the label based on the entity.

    """

    def __init__(
        self,
        *args,
        model_name: str = "HuggingFaceTB/SmolLM2-1.7B-Instruct",
        use_gpu: bool = False,
        **kwargs,
    ):
        """Initializes the LLM label generator.

        Args:
            model_name: The name of the model to use.
            use_gpu: Whether to use GPU or not.

        Examples:
            >>> from anonipy.anonymize.generators import LLMLabelGenerator
            >>> generator = LLMLabelGenerator()
            LLMLabelGenerator()

        """

        super().__init__(*args, **kwargs)

        if use_gpu and not torch.cuda.is_available():
            warnings.warn(
                "The use_gpu=True flag requires GPU/CUDA, but it is not available. Setting use_gpu=False."
            )
            use_gpu = False

        self.model, self.tokenizer = self._prepare_model_and_tokenizer(
            model_name, use_gpu
        )

    def generate(
        self,
        entity: Entity,
        *args,
        add_entity_attrs: str = "",
        temperature: float = 1.0,
        top_p: float = 0.95,
        **kwargs,
    ) -> str:
        """Generate the substitute for the entity based on it's attributes.

        Examples:
            >>> from anonipy.anonymize.generators import LLMLabelGenerator
            >>> generator = LLMLabelGenerator()
            >>> generator.generate(entity)
            label

        Args:
            entity: The entity to generate the label from.
            add_entity_attrs: Additional entity attribute description to add to the generation.
            temperature: The temperature to use for the generation.
            top_p: The top p to use for the generation.

        Returns:
            The generated entity label substitute.

        """

        message = [
            {
                "role": "system",
                "content": "You are a helpful AI assistant for generating replacements for text entities.",
            },
            {
                "role": "user",
                "content": f"What is a random {add_entity_attrs} {entity.label} replacement for {entity.text}? Respond only with the replacement.",
            },
        ]
        return self._generate_response(message, temperature, top_p)

    # =================================
    # Private methods
    # =================================

    def _prepare_model_and_tokenizer(
        self, model_name: str, use_gpu: bool
    ) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
        """Prepares the model and tokenizer.

        Args:
            model_name: The name of the model to use.

        Returns:
            The huggingface model.
            The huggingface tokenizer.

        """

        # prepare the model
        device = torch.device(
            "cuda" if use_gpu and torch.cuda.is_available() else "cpu"
        )
        model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
        # prepare the tokenizer
        tokenizer = AutoTokenizer.from_pretrained(
            model_name, padding_side="right", use_fast=False
        )
        return model, tokenizer

    def _generate_response(
        self, message: List[dict], temperature: float, top_p: float
    ) -> str:
        """Generate the response from the LLM.

        Args:
            message: The message to generate the response from.
            temperature: The temperature to use for the generation.
            top_p: The top p to use for the generation.

        Returns:
            The generated response.

        """

        # tokenize the message
        input_ids = self.tokenizer.apply_chat_template(
            message, tokenize=True, return_tensors="pt", add_generation_prompt=True
        ).to(self.model.device)

        # generate the response
        with torch.no_grad():
            output_ids = self.model.generate(
                input_ids,
                max_new_tokens=50,
                temperature=temperature,
                top_p=top_p,
                do_sample=True,
            )

        # decode the response
        response = self.tokenizer.decode(
            output_ids[0][len(input_ids[0]) :], skip_special_tokens=True
        )
        return response

__init__(*args, model_name='HuggingFaceTB/SmolLM2-1.7B-Instruct', use_gpu=False, **kwargs)

Initializes the LLM label generator.

Parameters:

Name Type Description Default
model_name str

The name of the model to use.

'HuggingFaceTB/SmolLM2-1.7B-Instruct'
use_gpu bool

Whether to use GPU or not.

False

Examples:

>>> from anonipy.anonymize.generators import LLMLabelGenerator
>>> generator = LLMLabelGenerator()
LLMLabelGenerator()
Source code in anonipy/anonymize/generators/llm_label_generator.py
def __init__(
    self,
    *args,
    model_name: str = "HuggingFaceTB/SmolLM2-1.7B-Instruct",
    use_gpu: bool = False,
    **kwargs,
):
    """Initializes the LLM label generator.

    Args:
        model_name: The name of the model to use.
        use_gpu: Whether to use GPU or not.

    Examples:
        >>> from anonipy.anonymize.generators import LLMLabelGenerator
        >>> generator = LLMLabelGenerator()
        LLMLabelGenerator()

    """

    super().__init__(*args, **kwargs)

    if use_gpu and not torch.cuda.is_available():
        warnings.warn(
            "The use_gpu=True flag requires GPU/CUDA, but it is not available. Setting use_gpu=False."
        )
        use_gpu = False

    self.model, self.tokenizer = self._prepare_model_and_tokenizer(
        model_name, use_gpu
    )

generate(entity, *args, add_entity_attrs='', temperature=1.0, top_p=0.95, **kwargs)

Generate the substitute for the entity based on it's attributes.

Examples:

>>> from anonipy.anonymize.generators import LLMLabelGenerator
>>> generator = LLMLabelGenerator()
>>> generator.generate(entity)
label

Parameters:

Name Type Description Default
entity Entity

The entity to generate the label from.

required
add_entity_attrs str

Additional entity attribute description to add to the generation.

''
temperature float

The temperature to use for the generation.

1.0
top_p float

The top p to use for the generation.

0.95

Returns:

Type Description
str

The generated entity label substitute.

Source code in anonipy/anonymize/generators/llm_label_generator.py
def generate(
    self,
    entity: Entity,
    *args,
    add_entity_attrs: str = "",
    temperature: float = 1.0,
    top_p: float = 0.95,
    **kwargs,
) -> str:
    """Generate the substitute for the entity based on it's attributes.

    Examples:
        >>> from anonipy.anonymize.generators import LLMLabelGenerator
        >>> generator = LLMLabelGenerator()
        >>> generator.generate(entity)
        label

    Args:
        entity: The entity to generate the label from.
        add_entity_attrs: Additional entity attribute description to add to the generation.
        temperature: The temperature to use for the generation.
        top_p: The top p to use for the generation.

    Returns:
        The generated entity label substitute.

    """

    message = [
        {
            "role": "system",
            "content": "You are a helpful AI assistant for generating replacements for text entities.",
        },
        {
            "role": "user",
            "content": f"What is a random {add_entity_attrs} {entity.label} replacement for {entity.text}? Respond only with the replacement.",
        },
    ]
    return self._generate_response(message, temperature, top_p)

anonipy.anonymize.generators.MaskLabelGenerator

Bases: GeneratorInterface

The class representing the mask label generator.

Examples:

>>> from anonipy.anonymize.generators import MaskLabelGenerator
>>> generator = MaskLabelGenerator(model_name, context_window=100, use_gpu=False)
>>> generator.generate(entity)

Attributes:

Name Type Description
pipeline Pipeline

The transformers pipeline used to generate the label substitutes.

context_window int

The context window size to use to generate the label substitutes.

mask_token str

The mask token to use to replace the masked words.

Methods:

Name Description
generate

Generate the substitute for the entity based on it's location in the text.

Source code in anonipy/anonymize/generators/mask_label_generator.py
class MaskLabelGenerator(GeneratorInterface):
    """The class representing the mask label generator.

    Examples:
        >>> from anonipy.anonymize.generators import MaskLabelGenerator
        >>> generator = MaskLabelGenerator(model_name, context_window=100, use_gpu=False)
        >>> generator.generate(entity)


    Attributes:
        pipeline (Pipeline): The transformers pipeline used to generate the label substitutes.
        context_window (int): The context window size to use to generate the label substitutes.
        mask_token (str): The mask token to use to replace the masked words.

    Methods:
        generate(entity, text):
            Generate the substitute for the entity based on it's location in the text.

    """

    def __init__(
        self,
        *args,
        model_name: str = "FacebookAI/xlm-roberta-large",
        use_gpu: bool = False,
        context_window: int = 100,
        **kwargs,
    ):
        """Initializes the mask label generator.

        Examples:
            >>> from anonipy.anonymize.generators import MaskLabelGenerator
            >>> generator = MaskLabelGenerator(context_window=120, use_gpu=True)

        Args:
            model_name: The name of the masking model to use.
            use_gpu: Whether to use GPU/CUDA, if available.
            context_window: The context window size.

        """

        super().__init__(*args, **kwargs)
        self.context_window = context_window
        if use_gpu and not torch.cuda.is_available():
            warnings.warn(
                "The use_gpu=True flag requires GPU/CUDA, but it is not available. Setting use_gpu=False."
            )
            use_gpu = False

        # prepare the fill-mask pipeline and store the mask token
        model, tokenizer, device = self._prepare_model_and_tokenizer(
            model_name, use_gpu
        )
        self.mask_token = tokenizer.mask_token
        self.pipeline = pipeline(
            "fill-mask", model=model, tokenizer=tokenizer, top_k=40, device=device
        )

    def generate(self, entity: Entity, text: str, *args, **kwargs) -> str:
        """Generate the substitute for the entity using the masking model.

        Examples:
            >>> from anonipy.anonymize.generators import MaskLabelGenerator
            >>> generator = MaskLabelGenerator(context_window=120, use_gpu=True)
            >>> generator.generate(entity, text)
            label

        Args:
            entity: The entity used to generate the substitute.
            text: The original text in which the entity is located; used to get the entity's context.

        Returns:
            The generated substitute text.

        """

        masks = self._create_masks(entity)
        input_texts = self._prepare_generate_inputs(masks, text)
        suggestions = self.pipeline(input_texts)
        return self._create_substitute(entity, masks, suggestions)

    # =================================
    # Private methods
    # =================================

    def _prepare_model_and_tokenizer(
        self, model_name: str, use_gpu: bool
    ) -> Tuple[AutoModelForMaskedLM, AutoTokenizer]:
        """Prepares the model and tokenizer.

        Args:
            model_name: The name of the model to use.
            use_gpu: Whether to use GPU/CUDA, if available.

        Returns:
            The huggingface model.
            The huggingface tokenizer.
            The device to use.

        """

        # prepare the model
        device = torch.device(
            "cuda" if use_gpu and torch.cuda.is_available() else "cpu"
        )
        model = AutoModelForMaskedLM.from_pretrained(model_name).to(device)
        # prepare the tokenizer
        tokenizer = AutoTokenizer.from_pretrained(model_name)

        return model, tokenizer, device

    def _create_masks(self, entity: Entity) -> List[dict]:
        """Creates the masks for the provided entity.

        Args:
            entity: The entity to create the masks for.

        Returns:
            The list of masks attributes, including the true text, mask text, start index, and end index within the original text.

        """

        masks = []
        chunks = re.split(r"\s+", entity.text)
        for idx in range(len(chunks)):
            masks.append(
                {
                    "true_text": chunks[idx],
                    "mask_text": " ".join(
                        chunks[0:idx] + [self.mask_token] + chunks[idx + 1 :]
                    ),
                    "start_index": entity.start_index,
                    "end_index": entity.end_index,
                }
            )
        return masks

    def _get_context_text(self, text: str, start_index: int, end_index: int) -> str:
        """Get the context text.

        Args:
            text: The text to get the context from.
            start_index: The start index of the context window.
            end_index: The end index of the context window.

        Returns:
            The context window text.

        """

        min_index = max(0, start_index - self.context_window)
        max_index = min(end_index + self.context_window, len(text))
        return text[min_index:max_index]

    def _prepare_generate_inputs(self, masks: List[dict], text: str) -> List[str]:
        """Prepares the generate inputs.

        Args:
            masks: The list of masks attributes.
            text: The text to prepare the generate inputs for.

        Returns:
            The list of generate inputs.

        """
        return [
            self._get_context_text(
                text[: m["start_index"]] + m["mask_text"] + text[m["end_index"] :],
                m["start_index"],
                m["end_index"],
            )
            for m in masks
        ]

    def _create_substitute(
        self, entity: Entity, masks: List[dict], suggestions: List[dict]
    ) -> str:
        """Create a substitute for the entity.

        Args:
            entity: The entity to create the substitute for.
            masks: The list of masks attributes.
            suggestions: The list of substitute suggestions.

        Returns:
            The created and selected substitute text.

        """

        substitute_chunks = []
        for mask, suggestion in zip(masks, suggestions):
            suggestion = suggestion if type(suggestion) == list else [suggestion]
            viable_suggestions = list(
                filter(
                    lambda x: x["token_str"] != mask["true_text"]
                    and re.match(entity.regex, x["token_str"])
                    and x["token_str"] not in STOPWORDS,
                    suggestion,
                )
            )
            substitute_chunks.append([s["token_str"] for s in viable_suggestions[:3]])
        combinations = list(itertools.product(*substitute_chunks))
        combinations = list(map(lambda x: " ".join(set(x)), combinations))
        return random.choice(combinations) if len(combinations) > 0 else "None"

__init__(*args, model_name='FacebookAI/xlm-roberta-large', use_gpu=False, context_window=100, **kwargs)

Initializes the mask label generator.

Examples:

>>> from anonipy.anonymize.generators import MaskLabelGenerator
>>> generator = MaskLabelGenerator(context_window=120, use_gpu=True)

Parameters:

Name Type Description Default
model_name str

The name of the masking model to use.

'FacebookAI/xlm-roberta-large'
use_gpu bool

Whether to use GPU/CUDA, if available.

False
context_window int

The context window size.

100
Source code in anonipy/anonymize/generators/mask_label_generator.py
def __init__(
    self,
    *args,
    model_name: str = "FacebookAI/xlm-roberta-large",
    use_gpu: bool = False,
    context_window: int = 100,
    **kwargs,
):
    """Initializes the mask label generator.

    Examples:
        >>> from anonipy.anonymize.generators import MaskLabelGenerator
        >>> generator = MaskLabelGenerator(context_window=120, use_gpu=True)

    Args:
        model_name: The name of the masking model to use.
        use_gpu: Whether to use GPU/CUDA, if available.
        context_window: The context window size.

    """

    super().__init__(*args, **kwargs)
    self.context_window = context_window
    if use_gpu and not torch.cuda.is_available():
        warnings.warn(
            "The use_gpu=True flag requires GPU/CUDA, but it is not available. Setting use_gpu=False."
        )
        use_gpu = False

    # prepare the fill-mask pipeline and store the mask token
    model, tokenizer, device = self._prepare_model_and_tokenizer(
        model_name, use_gpu
    )
    self.mask_token = tokenizer.mask_token
    self.pipeline = pipeline(
        "fill-mask", model=model, tokenizer=tokenizer, top_k=40, device=device
    )

generate(entity, text, *args, **kwargs)

Generate the substitute for the entity using the masking model.

Examples:

>>> from anonipy.anonymize.generators import MaskLabelGenerator
>>> generator = MaskLabelGenerator(context_window=120, use_gpu=True)
>>> generator.generate(entity, text)
label

Parameters:

Name Type Description Default
entity Entity

The entity used to generate the substitute.

required
text str

The original text in which the entity is located; used to get the entity's context.

required

Returns:

Type Description
str

The generated substitute text.

Source code in anonipy/anonymize/generators/mask_label_generator.py
def generate(self, entity: Entity, text: str, *args, **kwargs) -> str:
    """Generate the substitute for the entity using the masking model.

    Examples:
        >>> from anonipy.anonymize.generators import MaskLabelGenerator
        >>> generator = MaskLabelGenerator(context_window=120, use_gpu=True)
        >>> generator.generate(entity, text)
        label

    Args:
        entity: The entity used to generate the substitute.
        text: The original text in which the entity is located; used to get the entity's context.

    Returns:
        The generated substitute text.

    """

    masks = self._create_masks(entity)
    input_texts = self._prepare_generate_inputs(masks, text)
    suggestions = self.pipeline(input_texts)
    return self._create_substitute(entity, masks, suggestions)

anonipy.anonymize.generators.NumberGenerator

Bases: GeneratorInterface

The class representing the number generator.

Examples:

>>> from anonipy.anonymize.generators import NumberGenerator
>>> generator = NumberGenerator()
>>> generator.generate(entity)

Methods:

Name Description
generate

Generates a substitute for the numeric entity.

Source code in anonipy/anonymize/generators/number_generator.py
class NumberGenerator(GeneratorInterface):
    """The class representing the number generator.

    Examples:
        >>> from anonipy.anonymize.generators import NumberGenerator
        >>> generator = NumberGenerator()
        >>> generator.generate(entity)

    Methods:
        generate(self, entity):
            Generates a substitute for the numeric entity.

    """

    def __init__(self, *args, **kwargs):
        """Initializes the number generator.

        Examples:
            >>> from anonipy.anonymize.generators import NumberGenerator
            >>> generator = NumberGenerator()

        """

        super().__init__(*args, **kwargs)

    def generate(self, entity: Entity, *args, **kwargs) -> str:
        """Generates the substitute for the numeric entity.

        Examples:
            >>> from anonipy.anonymize.generators import NumberGenerator
            >>> generator = NumberGenerator()
            >>> generator.generate(entity)
            "1234567890"

        Args:
            entity: The numeric entity to generate the numeric substitute.

        Returns:
            The generated numeric substitute.

        Raises:
            ValueError: If the entity type is not `integer`, `float`, `phone_number` or `custom`.

        """

        if entity.type in ["custom"]:
            warnings.warn(
                "The entity type is `custom`. Make sure the generator is returning appropriate values."
            )
        elif entity.type not in ["integer", "float", "phone_number"]:
            raise ValueError(
                "The entity type must be `integer`, `float`, `phone_number` or `custom` to generate numbers."
            )
        return "".join(
            [str(random.randint(0, 9)) if d.isdigit() else d for d in entity.text]
        )

__init__(*args, **kwargs)

Initializes the number generator.

Examples:

>>> from anonipy.anonymize.generators import NumberGenerator
>>> generator = NumberGenerator()
Source code in anonipy/anonymize/generators/number_generator.py
def __init__(self, *args, **kwargs):
    """Initializes the number generator.

    Examples:
        >>> from anonipy.anonymize.generators import NumberGenerator
        >>> generator = NumberGenerator()

    """

    super().__init__(*args, **kwargs)

generate(entity, *args, **kwargs)

Generates the substitute for the numeric entity.

Examples:

>>> from anonipy.anonymize.generators import NumberGenerator
>>> generator = NumberGenerator()
>>> generator.generate(entity)
"1234567890"

Parameters:

Name Type Description Default
entity Entity

The numeric entity to generate the numeric substitute.

required

Returns:

Type Description
str

The generated numeric substitute.

Raises:

Type Description
ValueError

If the entity type is not integer, float, phone_number or custom.

Source code in anonipy/anonymize/generators/number_generator.py
def generate(self, entity: Entity, *args, **kwargs) -> str:
    """Generates the substitute for the numeric entity.

    Examples:
        >>> from anonipy.anonymize.generators import NumberGenerator
        >>> generator = NumberGenerator()
        >>> generator.generate(entity)
        "1234567890"

    Args:
        entity: The numeric entity to generate the numeric substitute.

    Returns:
        The generated numeric substitute.

    Raises:
        ValueError: If the entity type is not `integer`, `float`, `phone_number` or `custom`.

    """

    if entity.type in ["custom"]:
        warnings.warn(
            "The entity type is `custom`. Make sure the generator is returning appropriate values."
        )
    elif entity.type not in ["integer", "float", "phone_number"]:
        raise ValueError(
            "The entity type must be `integer`, `float`, `phone_number` or `custom` to generate numbers."
        )
    return "".join(
        [str(random.randint(0, 9)) if d.isdigit() else d for d in entity.text]
    )

anonipy.anonymize.generators.DateGenerator

Bases: GeneratorInterface

The class representing the date generator.

Examples:

>>> from anonipy.anonymize.generators import DateGenerator
>>> generator = DateGenerator(lang="de")
>>> generator.generate(entity)

Attributes:

Name Type Description
lang (str, LANGUAGES)

The language of the text.

date_format str

The date format in which the date should be generated.

day_sigma int

The range of the random date in days.

Methods:

Name Description
generate

Generate the date substitute based on the input parameters.

Source code in anonipy/anonymize/generators/date_generator.py
class DateGenerator(GeneratorInterface):
    """The class representing the date generator.

    Examples:
        >>> from anonipy.anonymize.generators import DateGenerator
        >>> generator = DateGenerator(lang="de")
        >>> generator.generate(entity)

    Attributes:
        lang (str, LANGUAGES): The language of the text.
        date_format (str): The date format in which the date should be generated.
        day_sigma (int): The range of the random date in days.

    Methods:
        generate(entity, output_gen):
            Generate the date substitute based on the input parameters.

    """

    def __init__(
        self,
        *args,
        lang: Union[str, LANGUAGES] = "en",
        date_format: str = "auto",
        day_sigma: int = 30,
        **kwargs,
    ):
        """Initializes the date generator.

        Examples:
            >>> from anonipy.anonymize.generators import DateGenerator
            >>> generator = DateGenerator()

        Args:
            lang: The language of the text.
            date_format: The date format in which the date should be generated. More on date formats [see here](https://www.contensis.com/help-and-docs/guides/querying-your-content/zenql-search/date-formats).
            day_sigma: The range of the random date in days.

        """

        super().__init__(*args, **kwargs)
        self.date_format = date_format
        self.day_sigma = day_sigma

        if isinstance(lang, str) and lang in LANGUAGES.supported_languages():
            self.lang = lang
        elif isinstance(lang, LANGUAGES):
            self.lang = lang[0]
        else:
            raise Exception(f"Unknown lang value: {lang}")

    def generate(
        self,
        entity: Entity,
        *args,
        sub_variant: DATE_TRANSFORM_VARIANTS = DATE_TRANSFORM_VARIANTS.RANDOM,
        **kwargs,
    ) -> str:
        """Generate the entity substitute based on the input parameters.

        Args:
            entity: The entity to generate the date substitute from.
            sub_variant: The substitute function variant to use.

        Returns:
            The generated date substitute.

        Raises:
            ValueError: If the entity type is not `date` or `custom`.

        """

        if entity.type in ["custom"]:
            warnings.warn(
                "The entity type is `custom`. Make sure the generator is returning appropriate values."
            )
        elif entity.type not in ["date"]:
            raise ValueError("The entity type must be `date` to generate dates.")

        if not DATE_TRANSFORM_VARIANTS.is_valid(sub_variant):
            raise ValueError(
                f"The sub_variant must be one of {', '.join(DATE_TRANSFORM_VARIANTS.values())} to generate dates."
            )

        # detect the date format
        if self.date_format == "auto":
            entity_date, date_format = detect_datetime_format(entity.text, self.lang)
        else:
            entity_date = dateparser.parse(entity.text, languages=[self.lang])
            date_format = self.date_format

        # validate the input values
        if entity_date is None:
            raise ValueError(f"Entity `{entity.text}` is not a valid date.")
        if date_format is None or isinstance(date_format, ValueError):
            raise ValueError(f"Entity `{entity.text}` is not a valid date.")

        # generate the date substitute
        generate_date = DATE_VARIANTS_MAPPING[sub_variant](entity_date, self.day_sigma)
        return format_datetime(generate_date, format=date_format, locale=self.lang)

__init__(*args, lang='en', date_format='auto', day_sigma=30, **kwargs)

Initializes the date generator.

Examples:

>>> from anonipy.anonymize.generators import DateGenerator
>>> generator = DateGenerator()

Parameters:

Name Type Description Default
lang Union[str, LANGUAGES]

The language of the text.

'en'
date_format str

The date format in which the date should be generated. More on date formats see here.

'auto'
day_sigma int

The range of the random date in days.

30
Source code in anonipy/anonymize/generators/date_generator.py
def __init__(
    self,
    *args,
    lang: Union[str, LANGUAGES] = "en",
    date_format: str = "auto",
    day_sigma: int = 30,
    **kwargs,
):
    """Initializes the date generator.

    Examples:
        >>> from anonipy.anonymize.generators import DateGenerator
        >>> generator = DateGenerator()

    Args:
        lang: The language of the text.
        date_format: The date format in which the date should be generated. More on date formats [see here](https://www.contensis.com/help-and-docs/guides/querying-your-content/zenql-search/date-formats).
        day_sigma: The range of the random date in days.

    """

    super().__init__(*args, **kwargs)
    self.date_format = date_format
    self.day_sigma = day_sigma

    if isinstance(lang, str) and lang in LANGUAGES.supported_languages():
        self.lang = lang
    elif isinstance(lang, LANGUAGES):
        self.lang = lang[0]
    else:
        raise Exception(f"Unknown lang value: {lang}")

generate(entity, *args, sub_variant=DATE_TRANSFORM_VARIANTS.RANDOM, **kwargs)

Generate the entity substitute based on the input parameters.

Parameters:

Name Type Description Default
entity Entity

The entity to generate the date substitute from.

required
sub_variant DATE_TRANSFORM_VARIANTS

The substitute function variant to use.

RANDOM

Returns:

Type Description
str

The generated date substitute.

Raises:

Type Description
ValueError

If the entity type is not date or custom.

Source code in anonipy/anonymize/generators/date_generator.py
def generate(
    self,
    entity: Entity,
    *args,
    sub_variant: DATE_TRANSFORM_VARIANTS = DATE_TRANSFORM_VARIANTS.RANDOM,
    **kwargs,
) -> str:
    """Generate the entity substitute based on the input parameters.

    Args:
        entity: The entity to generate the date substitute from.
        sub_variant: The substitute function variant to use.

    Returns:
        The generated date substitute.

    Raises:
        ValueError: If the entity type is not `date` or `custom`.

    """

    if entity.type in ["custom"]:
        warnings.warn(
            "The entity type is `custom`. Make sure the generator is returning appropriate values."
        )
    elif entity.type not in ["date"]:
        raise ValueError("The entity type must be `date` to generate dates.")

    if not DATE_TRANSFORM_VARIANTS.is_valid(sub_variant):
        raise ValueError(
            f"The sub_variant must be one of {', '.join(DATE_TRANSFORM_VARIANTS.values())} to generate dates."
        )

    # detect the date format
    if self.date_format == "auto":
        entity_date, date_format = detect_datetime_format(entity.text, self.lang)
    else:
        entity_date = dateparser.parse(entity.text, languages=[self.lang])
        date_format = self.date_format

    # validate the input values
    if entity_date is None:
        raise ValueError(f"Entity `{entity.text}` is not a valid date.")
    if date_format is None or isinstance(date_format, ValueError):
        raise ValueError(f"Entity `{entity.text}` is not a valid date.")

    # generate the date substitute
    generate_date = DATE_VARIANTS_MAPPING[sub_variant](entity_date, self.day_sigma)
    return format_datetime(generate_date, format=date_format, locale=self.lang)

anonipy.anonymize.generators.GeneratorInterface

The class representing the generator interface.

All generators should inherit from this class.

Methods:

Name Description
generate

Generate a substitute for the entity.

Source code in anonipy/anonymize/generators/interface.py
class GeneratorInterface:
    """The class representing the generator interface.

    All generators should inherit from this class.

    Methods:
        generate(entity):
            Generate a substitute for the entity.

    """

    def __init__(self, *args, **kwargs):
        pass

    def generate(self, entity: Entity, *args, **kwargs):
        pass