# Copyright 2025 ICube (University of Strasbourg - CNRS)
# author: Julien PONTABRY (ICube)
#
# This software is a computer program whose purpose is to provide a toolkit
# to model, process and analyze the longitudinal reorganization of brain
# connectivity data, as functional MRI for instance.
#
# This software is governed by the CeCILL-B license under French law and
# abiding by the rules of distribution of free software. You can use,
# modify and/or redistribute the software under the terms of the CeCILL-B
# license as circulated by CEA, CNRS and INRIA at the following URL
# "http://www.cecill.info".
#
# As a counterpart to the access to the source code and rights to copy,
# modify and redistribute granted by the license, users are provided only
# with a limited warranty and the software's author, the holder of the
# economic rights, and the successive licensors have only limited
# liability.
#
# In this respect, the user's attention is drawn to the risks associated
# with loading, using, modifying and/or developing or reproducing the
# software by the user in light of its specific status of free software,
# that may mean that it is complicated to manipulate, and that also
# therefore means that it is reserved for developers and experienced
# professionals having in-depth computer knowledge. Users are therefore
# encouraged to load and test the software's suitability as regards their
# requirements in conditions enabling the security of their systems and/or
# data to be ensured and, more generally, to use and operate it in the
# same conditions as regards security.
#
# The fact that you are presently reading this means that you have had
# knowledge of the CeCILL-B license and that you accept its terms.
import secrets
from abc import ABC, abstractmethod
from pathlib import Path
from typing import Optional, Generator, Type
from .utils import SQLiteConnected
[docs]
class AbstractClassNotMeantToBeUsedDirectly(NotImplementedError):
def __init__(self):
super().__init__("Abstract class not meant to be used directly")
[docs]
class DataFilesDB(ABC):
"""Abstract base class for managing a database of data files, each associated with a unique token.
Parameters
----------
token_nb_bytes : int, optional
Number of bytes to use when generating unique tokens for data files (default is 3).
Methods
-------
add(file_path: Path) -> str
Adds a data file to the database and returns its unique token.
get(token: str) -> Optional[pathlib.Path]
Retrieves the file path associated with the given token.
list() -> Generator[tuple[str, pathlib.Path], None, None]
Lists all token-file path pairs in the database.
"""
def __init__(self, token_nb_bytes: int = 3, debug: bool = False):
self.__token_nb_bytes = token_nb_bytes
if debug:
self.__generate_token_impl = lambda: "debug-mode-token"
else:
self.__generate_token_impl = lambda: secrets.token_urlsafe(nbytes=self.__token_nb_bytes)
@abstractmethod
def _add_data_file_to_db(self, token: str, file_path: Path) -> None:
raise AbstractClassNotMeantToBeUsedDirectly()
def __generate_token(self) -> str:
return self.__generate_token_impl()
[docs]
def add(self, file_path: Path) -> str:
"""Adds a data file to the database and returns its unique token.
Parameters
----------
file_path : Path
The path to the data file to be added.
Returns
-------
str
A unique token associated with the added data file.
"""
token = self.__generate_token()
self._add_data_file_to_db(token, file_path)
return token
[docs]
@abstractmethod
def get(self, token: str) -> Optional[Path]:
"""Retrieves the file path associated with the given token.
Parameters
----------
token : str
The unique token associated with the data file.
Returns
-------
pathlib.Path or None
The path to the data file if found, otherwise None.
"""
raise AbstractClassNotMeantToBeUsedDirectly()
[docs]
@abstractmethod
def list(self) -> Generator[tuple[str, Path], None, None]:
"""Lists all token-file path pairs in the database.
Returns
-------
Generator of tuple[str, pathlib.Path]
A generator yielding tuples of tokens and their associated file paths.
"""
raise AbstractClassNotMeantToBeUsedDirectly()
def __iter__(self):
return iter(self.list())
@abstractmethod
def __len__(self) -> int:
raise AbstractClassNotMeantToBeUsedDirectly()
def __str__(self) -> str:
return f"token_nb_bytes={self.__token_nb_bytes}"
[docs]
class MemoryDataFilesDB(DataFilesDB):
"""In-memory implementation of the DataFilesDB abstract base class.
This class manages a database of data files using a Python dictionary,
mapping unique tokens to file paths. It is suitable for use cases where
persistence is not required and the number of files is relatively small.
Parameters
----------
token_nb_bytes : int, optional
Number of bytes to use when generating unique tokens for data files (default is 3).
Methods
-------
add(file_path: Path) -> str
Adds a data file to the database and returns its unique token.
get(token: str) -> Optional[Path]
Retrieves the file path associated with the given token.
list() -> Generator[tuple[str, Path], None, None]
Lists all token-file path pairs in the database.
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.__db: dict[str, Path] = {} # DB is represented by a dictionary mapping a token to a data path's file
def _add_data_file_to_db(self, token: str, file_path: Path) -> None:
self.__db[token] = file_path
[docs]
def get(self, token: str) -> Optional[Path]:
return self.__db.get(token)
[docs]
def list(self) -> Generator[tuple[str, Path], None, None]:
for token in self.__db:
yield token, self.__db[token]
def __len__(self) -> int:
return len(self.__db)
def __str__(self) -> str:
return f"MemoryDataFilesDB({super().__str__()})"
def __repr__(self) -> str:
return str(self)
[docs]
class SQLiteDataFilesDB(DataFilesDB, SQLiteConnected):
"""SQLite-backed implementation of the DataFilesDB.
Stores token -> file path mappings in a SQLite database. Pass a
Path or string via the `db_path` parameter to persist to disk.
Parameters
----------
db_path : pathlib.Path | str
Path to the sqlite database file.
Other keyword args are forwarded to the parent DataFilesDB (e.g. token_nb_bytes,
debug).
"""
def __init__(self, db_path: Path, *args, **kwargs):
DataFilesDB.__init__(self, *args, **kwargs)
SQLiteConnected.__init__(self, db_path)
self.__init_db()
def __init_db(self):
with self._get_connection() as conn:
conn.execute('''
CREATE TABLE IF NOT EXISTS files (
token TEXT NOT NULL PRIMARY KEY,
path TEXT NOT NULL
)
''')
def _add_data_file_to_db(self, token: str, file_path: Path) -> None:
with self._get_connection() as conn:
conn.execute('INSERT OR REPLACE INTO files (token, path) VALUES (?, ?)',
(token, str(file_path)))
[docs]
def get(self, token: str) -> Optional[Path]:
with self._get_connection() as conn:
row = conn.execute('SELECT path FROM files WHERE token = ?', (token,)).fetchone()
if row is None:
return None
return Path(row[0])
[docs]
def list(self) -> Generator[tuple[str, Path], None, None]:
with self._get_connection() as conn:
rows = conn.execute("SELECT token, path FROM files ORDER BY rowid").fetchall()
for token, path in rows:
yield token, Path(path)
def __len__(self) -> int:
with self._get_connection() as conn:
row = conn.execute("SELECT COUNT(*) FROM files").fetchone()
return int(row[0]) if row is not None else 0
singleton_data_files_db: Optional[DataFilesDB] = None
[docs]
def get_data_file_db(requested_type: Optional[Type[DataFilesDB]] = None, **kwargs) -> DataFilesDB:
"""Returns the singleton instance of the data files database.
If the singleton instance does not exist, it is created using the specified type
or defaults to SQLiteDataFilesDB.
Parameters
----------
requested_type : type[DataFilesDB] or None, optional
The class type of the data files database to instantiate. If None, defaults to
SQLiteDataFilesDB for database creation.
Returns
-------
DataFilesDB
The singleton instance of the data files database.
Raises
------
RuntimeError
If a database instance already exists with a different type than requested_type.
"""
global singleton_data_files_db
if singleton_data_files_db is None:
db_type = requested_type or SQLiteDataFilesDB
singleton_data_files_db = db_type(**kwargs)
elif requested_type is not None and not isinstance(singleton_data_files_db, requested_type):
raise RuntimeError(f"Unable to get a data file db of type {requested_type}! "
f"A DB already exists with type {type(singleton_data_files_db)}")
return singleton_data_files_db