Added docstring, and load_metadata function.
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import csv
|
||||
import logging
|
||||
from collections import OrderedDict
|
||||
from dataclasses import dataclass
|
||||
@@ -11,7 +12,6 @@ import numpy as np
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG, # Set the minimum logging level
|
||||
# format="%(asctime)s - [%(levelname)s] - %(filename)s:%(lineno)s - %(message)s",
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -25,11 +25,50 @@ class FaceBox:
|
||||
y2: int
|
||||
|
||||
|
||||
def load_metadata(p: Path, key_id="image", key_proc_fn=None) -> dict[str, dict[str, str]]:
|
||||
lines = []
|
||||
with open(p, newline="") as csvfile:
|
||||
dialect = csv.Sniffer().sniff(csvfile.read(1024))
|
||||
csvfile.seek(0)
|
||||
reader = csv.DictReader(csvfile, dialect=dialect)
|
||||
for row in reader:
|
||||
lines.append(row)
|
||||
|
||||
metadata = dict()
|
||||
for l in lines:
|
||||
new_key_id = key_proc_fn(l[key_id]) if key_proc_fn is not None else l[key_id]
|
||||
metadata[new_key_id] = dict((k, v) for k, v in l.items() if k != key_id)
|
||||
|
||||
return metadata
|
||||
|
||||
|
||||
def load_dataset(
|
||||
root: Path,
|
||||
meta_path: Optional[Path] = None,
|
||||
imname_proc_fn: Optional[Callable]=None
|
||||
) -> tuple[dict[Path, np.ndarray], dict[str, dict[str, Any]]]:
|
||||
) -> tuple[dict[Path, np.ndarray], Optional[dict[str, dict[str, Any]]]]:
|
||||
"""
|
||||
if `meta_path` is `None`, we won't attempt to read it.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
root: Path
|
||||
Root path to the images. We assume that the images are positioned in a
|
||||
flat directory under `root`.
|
||||
|
||||
meta_path: Optional[Path]
|
||||
Path to the metadata file on the dataset. Default is `None`.
|
||||
|
||||
imname_proc_fn: Optional[Callable]
|
||||
Function to apply to the image filenames when building the output
|
||||
dictionary. Use it to uniformize it with the metadata file. Default
|
||||
is `None`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
data: dict[Path, np.ndarray]
|
||||
metadata, Optional[dict[str, dict[str, Any]]]
|
||||
"""
|
||||
metadata = dict()
|
||||
paths = set([p for p in root.iterdir() if not p.is_dir()])
|
||||
if meta_path is not None and meta_path in paths:
|
||||
@@ -43,16 +82,16 @@ def load_dataset(
|
||||
try:
|
||||
im = cv2.cvtColor(cv2.imread(p), cv2.COLOR_BGR2RGB)
|
||||
except cv2.error:
|
||||
logger.info(f'File "{p}" is not an image.')
|
||||
if meta_path is None:
|
||||
logger.info(f'Trying to read "{p}" as metadata.')
|
||||
if p.suffix == ".csv":
|
||||
metadata = load_metadata(p)
|
||||
logger.info("Metadata read successfully.")
|
||||
else:
|
||||
logger.error(f'Failed to read "{p}" as metadata. Skipping.')
|
||||
elif not metadata:
|
||||
logger.critical("Logic error: Metadata should have been read already.")
|
||||
logger.info(f'File "{p}" is not an image. Skipping.')
|
||||
# if meta_path is None:
|
||||
# logger.info(f'Trying to read "{p}" as metadata.')
|
||||
# if p.suffix == ".csv":
|
||||
# metadata = load_metadata(p)
|
||||
# logger.info("Metadata read successfully.")
|
||||
# else:
|
||||
# logger.error(f'Failed to read "{p}" as metadata. Skipping.')
|
||||
# elif not metadata:
|
||||
# logger.critical("Logic error: Metadata should have been read already.")
|
||||
else:
|
||||
proc_imname = imname_proc_fn(p.name) if imname_proc_fn is not None else p.name
|
||||
ims[proc_imname] = im
|
||||
|
||||
Reference in New Issue
Block a user