Added docstring, and load_metadata function.

This commit is contained in:
2026-04-16 14:01:31 +01:00
parent adadf4c7fd
commit dc3e913f7b

View File

@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-
import csv
import logging
from collections import OrderedDict
from dataclasses import dataclass
@@ -11,7 +12,6 @@ import numpy as np
logging.basicConfig(
level=logging.DEBUG, # Set the minimum logging level
# format="%(asctime)s - [%(levelname)s] - %(filename)s:%(lineno)s - %(message)s",
)
logger = logging.getLogger(__name__)
@@ -25,11 +25,50 @@ class FaceBox:
y2: int
def load_metadata(p: Path, key_id="image", key_proc_fn=None) -> dict[str, dict[str, str]]:
lines = []
with open(p, newline="") as csvfile:
dialect = csv.Sniffer().sniff(csvfile.read(1024))
csvfile.seek(0)
reader = csv.DictReader(csvfile, dialect=dialect)
for row in reader:
lines.append(row)
metadata = dict()
for l in lines:
new_key_id = key_proc_fn(l[key_id]) if key_proc_fn is not None else l[key_id]
metadata[new_key_id] = dict((k, v) for k, v in l.items() if k != key_id)
return metadata
def load_dataset(
root: Path,
meta_path: Optional[Path] = None,
imname_proc_fn: Optional[Callable]=None
) -> tuple[dict[Path, np.ndarray], dict[str, dict[str, Any]]]:
) -> tuple[dict[Path, np.ndarray], Optional[dict[str, dict[str, Any]]]]:
"""
if `meta_path` is `None`, we won't attempt to read it.
Parameters
----------
root: Path
Root path to the images. We assume that the images are positioned in a
flat directory under `root`.
meta_path: Optional[Path]
Path to the metadata file on the dataset. Default is `None`.
imname_proc_fn: Optional[Callable]
Function to apply to the image filenames when building the output
dictionary. Use it to uniformize it with the metadata file. Default
is `None`.
Returns
-------
data: dict[Path, np.ndarray]
metadata, Optional[dict[str, dict[str, Any]]]
"""
metadata = dict()
paths = set([p for p in root.iterdir() if not p.is_dir()])
if meta_path is not None and meta_path in paths:
@@ -43,16 +82,16 @@ def load_dataset(
try:
im = cv2.cvtColor(cv2.imread(p), cv2.COLOR_BGR2RGB)
except cv2.error:
logger.info(f'File "{p}" is not an image.')
if meta_path is None:
logger.info(f'Trying to read "{p}" as metadata.')
if p.suffix == ".csv":
metadata = load_metadata(p)
logger.info("Metadata read successfully.")
else:
logger.error(f'Failed to read "{p}" as metadata. Skipping.')
elif not metadata:
logger.critical("Logic error: Metadata should have been read already.")
logger.info(f'File "{p}" is not an image. Skipping.')
# if meta_path is None:
# logger.info(f'Trying to read "{p}" as metadata.')
# if p.suffix == ".csv":
# metadata = load_metadata(p)
# logger.info("Metadata read successfully.")
# else:
# logger.error(f'Failed to read "{p}" as metadata. Skipping.')
# elif not metadata:
# logger.critical("Logic error: Metadata should have been read already.")
else:
proc_imname = imname_proc_fn(p.name) if imname_proc_fn is not None else p.name
ims[proc_imname] = im