Source code for grammarinator.tool.file_population

# Copyright (c) 2023-2026 Renata Hodovan, Akos Kiss.
#
# Licensed under the BSD 3-Clause License
# <LICENSE.rst or https://opensource.org/licenses/BSD-3-Clause>.
# This file may not be copied, modified, or distributed except
# according to those terms.

from __future__ import annotations

import glob
import logging
import os
import random

from os.path import basename, join

from ..runtime import Annotations, Individual, Population, Rule
from .tree_codec import AnnotatedTreeCodec, PickleTreeCodec, TreeCodec

logger = logging.getLogger(__name__)


[docs] class FilePopulation(Population): """ File system-based population that saves trees into files in a directory. The selection strategy used for mutation and recombination is purely random. """ def __init__(self, directory: str, extension: str, codec: TreeCodec | None = None) -> None: """ :param directory: Path to the directory containing the trees. :param extension: Extension of the files containing the trees. :param codec: Codec used to save trees into files (default: :class:`PickleTreeCodec`). """ self._directory: str = directory self._extension: str = extension self._codec: TreeCodec = codec or PickleTreeCodec() os.makedirs(directory, exist_ok=True) self._files = glob.glob(join(self._directory, f'*.{self._extension}'))
[docs] def empty(self) -> bool: """ Check whether the population contains no individuals. """ return len(self._files) == 0
[docs] def add_individual(self, root: Rule, path: str | None = None) -> None: """ Save the tree to a new file. The name of the tree file is determined from the basename of the given path, or from the population class name if none is provided. The output file is saved with the appropriate extension defined by the current tree codec. """ path = basename(path) if path else type(self).__name__ fn = join(self._directory, f'{path}.{self._extension}') self._save(fn, root) self._files.append(fn)
[docs] def select_individual(self, recipient: Individual | None = None) -> Individual: """ Randomly select an individual of the population and create a FileIndividual instance from it. :param recipient: Unused. :return: FileIndividual instance created from a randomly selected population item. """ return FileIndividual(self, random.sample(self._files, k=1)[0])
def _save(self, fn: str, root: Rule) -> None: with open(fn, 'wb') as f: if isinstance(self._codec, AnnotatedTreeCodec): f.write(self._codec.encode_annotated(root, Annotations(root))) else: f.write(self._codec.encode(root)) def _load(self, fn: str) -> tuple[Rule, Annotations | None]: with open(fn, 'rb') as f: if isinstance(self._codec, AnnotatedTreeCodec): root, annot = self._codec.decode_annotated(f.read()) else: root, annot = self._codec.decode(f.read()), None assert isinstance(root, Rule), root return root, annot
[docs] class FileIndividual(Individual): """ Individual subclass presenting a file-based population individual, which maintains both the tree and the associated annotations. It is responsible for loading and storing the tree and its annotations with the appropriate tree codec in a lazy manner. """ def __init__(self, population: FilePopulation, name: str) -> None: """ :param population: The population this individual belongs to. :param name: Path to the encoded tree file. """ super().__init__() self._population: FilePopulation = population self._name: str = name @property def root(self) -> Rule: """ Get the root of the tree. Return the root if it is already loaded, otherwise load it immediately. :return: The root of the tree. """ if not self._root: self._root, self._annot = self._population._load(self._name) if not self._root: logger.warning("Failed to load individual from %r", self._name) return self._root