Source code for fstg_toolkit.frequent.spminer

# Copyright 2025 ICube (University of Strasbourg - CNRS)
# author: Julien PONTABRY (ICube)
#
# This software is a computer program whose purpose is to provide a toolkit
# to model, process and analyze the longitudinal reorganization of brain
# connectivity data, as functional MRI for instance.
#
# This software is governed by the CeCILL-B license under French law and
# abiding by the rules of distribution of free software. You can use,
# modify and/or redistribute the software under the terms of the CeCILL-B
# license as circulated by CEA, CNRS and INRIA at the following URL
# "http://www.cecill.info".
#
# As a counterpart to the access to the source code and rights to copy,
# modify and redistribute granted by the license, users are provided only
# with a limited warranty and the software's author, the holder of the
# economic rights, and the successive licensors have only limited
# liability.
#
# In this respect, the user's attention is drawn to the risks associated
# with loading, using, modifying and/or developing or reproducing the
# software by the user in light of its specific status of free software,
# that may mean that it is complicated to manipulate, and that also
# therefore means that it is reserved for developers and experienced
# professionals having in-depth computer knowledge. Users are therefore
# encouraged to load and test the software's suitability as regards their
# requirements in conditions enabling the security of their systems and/or
# data to be ensured and, more generally, to use and operate it in the
# same conditions as regards security.
#
# The fact that you are presently reading this means that you have had
# knowledge of the CeCILL-B license and that you accept its terms.

import logging
import re
from pathlib import Path
from typing import Optional

from ._docker_utils import DockerHelper, DockerNotAvailableException, DockerImage

logger = logging.getLogger()


[docs] class SPMinerService: """Service wrapper for the SPMiner frequent subgraph pattern miner. Manages the Docker image lifecycle (build/load on demand) and provides a simple interface to run the miner on a directory of input graphs and collect the results. """
[docs] def __init__(self): """Initialise the service by connecting to Docker. Raises ------ RuntimeError If Docker is not available on the host system. """ try: self.__docker_helper = DockerHelper() except DockerNotAvailableException as e: raise RuntimeError("Unable to initialize SPMiner service.") from e self.__docker_image: Optional[DockerImage] = None self.__progress_reg = re.compile(r'^\[(?P<completed>\d+)/(?P<total>\d+)]')
[docs] def prepare(self): """Build or load the SPMiner Docker image if it is not already loaded. The image is built from the ``spminer/`` submodule located next to this package. Subsequent calls are no-ops if the image is already loaded. """ if self.__docker_image is None: # TODO use an external config file? tag = 'spminer:latest' build_path = Path(__file__).parent.parent / 'spminer' self.__docker_image = self.__docker_helper.load_local_image(tag, build_path)
[docs] def run(self, input_dir: Path, output_dir: Path): """Run the SPMiner container on a directory of graph files. Mounts ``input_dir`` as read-only and ``output_dir`` as read-write inside the container. Progress updates are yielded as they arrive. Parameters ---------- input_dir: Path Directory containing the input graph files. output_dir: Path Directory where the miner will write its output. Yields ------ tuple[int, int] ``(completed, total)`` progress tuples parsed from container stdout. """ self.prepare() # makes sure docker image is set output = self.__docker_image.run( volumes={str(input_dir.resolve()): {'bind': '/app/data', 'mode': 'ro'}, str(output_dir.resolve()): {'bind': '/app/results_batch', 'mode': 'rw'}}, stdout=True, stderr=True ) for line in output: if len(line) < 10: if match := self.__progress_reg.match(line): yield int(match.group('completed'))-1, int(match.group('total')) logger.debug(line[:-1] if line[-1] == '\n' else line)