From dc3e913f7b5b86a281664b02f4bb489501a9021a Mon Sep 17 00:00:00 2001 From: Guilherme Schardong Date: Thu, 16 Apr 2026 14:01:31 +0100 Subject: [PATCH] Added docstring, and `load_metadata` function. --- src/facebias/__init__.py | 63 ++++++++++++++++++++++++++++++++-------- 1 file changed, 51 insertions(+), 12 deletions(-) diff --git a/src/facebias/__init__.py b/src/facebias/__init__.py index 243d57b..2a94dfd 100644 --- a/src/facebias/__init__.py +++ b/src/facebias/__init__.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- +import csv import logging from collections import OrderedDict from dataclasses import dataclass @@ -11,7 +12,6 @@ import numpy as np logging.basicConfig( level=logging.DEBUG, # Set the minimum logging level - # format="%(asctime)s - [%(levelname)s] - %(filename)s:%(lineno)s - %(message)s", ) logger = logging.getLogger(__name__) @@ -25,11 +25,50 @@ class FaceBox: y2: int +def load_metadata(p: Path, key_id="image", key_proc_fn=None) -> dict[str, dict[str, str]]: + lines = [] + with open(p, newline="") as csvfile: + dialect = csv.Sniffer().sniff(csvfile.read(1024)) + csvfile.seek(0) + reader = csv.DictReader(csvfile, dialect=dialect) + for row in reader: + lines.append(row) + + metadata = dict() + for l in lines: + new_key_id = key_proc_fn(l[key_id]) if key_proc_fn is not None else l[key_id] + metadata[new_key_id] = dict((k, v) for k, v in l.items() if k != key_id) + + return metadata + + def load_dataset( root: Path, meta_path: Optional[Path] = None, imname_proc_fn: Optional[Callable]=None -) -> tuple[dict[Path, np.ndarray], dict[str, dict[str, Any]]]: +) -> tuple[dict[Path, np.ndarray], Optional[dict[str, dict[str, Any]]]]: + """ + if `meta_path` is `None`, we won't attempt to read it. + + Parameters + ---------- + root: Path + Root path to the images. We assume that the images are positioned in a + flat directory under `root`. + + meta_path: Optional[Path] + Path to the metadata file on the dataset. Default is `None`. + + imname_proc_fn: Optional[Callable] + Function to apply to the image filenames when building the output + dictionary. Use it to uniformize it with the metadata file. Default + is `None`. + + Returns + ------- + data: dict[Path, np.ndarray] + metadata, Optional[dict[str, dict[str, Any]]] + """ metadata = dict() paths = set([p for p in root.iterdir() if not p.is_dir()]) if meta_path is not None and meta_path in paths: @@ -43,16 +82,16 @@ def load_dataset( try: im = cv2.cvtColor(cv2.imread(p), cv2.COLOR_BGR2RGB) except cv2.error: - logger.info(f'File "{p}" is not an image.') - if meta_path is None: - logger.info(f'Trying to read "{p}" as metadata.') - if p.suffix == ".csv": - metadata = load_metadata(p) - logger.info("Metadata read successfully.") - else: - logger.error(f'Failed to read "{p}" as metadata. Skipping.') - elif not metadata: - logger.critical("Logic error: Metadata should have been read already.") + logger.info(f'File "{p}" is not an image. Skipping.') + # if meta_path is None: + # logger.info(f'Trying to read "{p}" as metadata.') + # if p.suffix == ".csv": + # metadata = load_metadata(p) + # logger.info("Metadata read successfully.") + # else: + # logger.error(f'Failed to read "{p}" as metadata. Skipping.') + # elif not metadata: + # logger.critical("Logic error: Metadata should have been read already.") else: proc_imname = imname_proc_fn(p.name) if imname_proc_fn is not None else p.name ims[proc_imname] = im