From f9798b9cf885b30c1cf10a804cac988e930af984 Mon Sep 17 00:00:00 2001 From: Guilherme Schardong Date: Fri, 29 May 2026 15:51:30 +0100 Subject: [PATCH] Adding some missing files from the last commits. --- .gitignore | 5 +- README.md | 6 + src/facebias/__init__.py | 167 ++++++++++++++++++++++- src/facebias/estimators/mivolo/README.md | 3 + 4 files changed, 175 insertions(+), 6 deletions(-) create mode 100644 README.md create mode 100644 src/facebias/estimators/mivolo/README.md diff --git a/.gitignore b/.gitignore index b95055f..5db814b 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,7 @@ .vscode *.egg-info landmark_models -*__pycache__/ \ No newline at end of file +*__pycache__/ +data/* +models/* +gpt*.py \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..840e35d --- /dev/null +++ b/README.md @@ -0,0 +1,6 @@ +# ISR Face Dataset/Model Bias Check API + +We use [FairFace](https://github.com/dchen236/FairFace) and [MiVOLO](https://github.com/wildchlamydia/mivolo) version 1, face-only checkpoint. + +## Dataset +Download the tar file `VISTEAM-NAS/Public_Data/facing2-skin-tone-train-images.tar.bz2` to the `data` directory, and extract it. This dataset has balanced sex and skin-tone, and unbalanced age. diff --git a/src/facebias/__init__.py b/src/facebias/__init__.py index 3b5982f..a2299e7 100644 --- a/src/facebias/__init__.py +++ b/src/facebias/__init__.py @@ -23,7 +23,10 @@ class FaceBox: # TODO(gschardong): Convert all CSV reading functions to pandas -def load_metadata(p: Path, key_id="image", key_proc_fn=None) -> dict[str, dict[str, str]]: + +def load_metadata( + p: Path, key_id="image", key_proc_fn=None +) -> dict[str, dict[str, str]]: lines = [] with open(p, newline="") as csvfile: dialect = csv.Sniffer().sniff(csvfile.read(1024)) @@ -41,9 +44,7 @@ def load_metadata(p: Path, key_id="image", key_proc_fn=None) -> dict[str, dict[s def load_dataset( - root: Path, - meta_path: Path | None, - imname_proc_fn: Callable |None + root: Path, meta_path: Path | None, imname_proc_fn: Callable | None ) -> tuple[dict[str, np.ndarray], dict[str, dict[str, Any]] | None]: """ if `meta_path` is `None`, we won't attempt to read it. @@ -83,7 +84,9 @@ def load_dataset( except cv2.error: logger.info(f'File "{p}" is not an image. Skipping.') else: - proc_imname = imname_proc_fn(p.name) if imname_proc_fn is not None else str(p.name) + proc_imname = ( + imname_proc_fn(p.name) if imname_proc_fn is not None else str(p.name) + ) ims[proc_imname] = im if not metadata: @@ -92,3 +95,157 @@ def load_dataset( logger.error(f'Metadata file not found at "{meta_path}".') return ims, metadata + + +# def calc_model_performance( +# gt: pd.DataFrame, +# preds: pd.DataFrame, +# keys: list[str] | None = None, +# possible_caps: dict[Capability, Any] | None = None, +# ) -> pd.DataFrame: +# """ +# We assume that both `gt` and `preds` have the same structure. They should +# be indexed by individual ID, such as the image name, and each value is a +# dictionary with model prediction capabilities as keys (e.g., "age_group", +# "sex", "skin-color", etc.), and the values are the predictions, or ground-truth +# values for each ID/capability. + +# if `keys` is empty, then we infer from common keys present in `preds` and `gt`. + +# Parameters +# ---------- +# gt: pd.DataFrame +# preds: pd.DataFrame +# keys: list[str] | None + +# Returns +# ------- +# metrics: pd.DataFrame +# """ +# common_caps = keys +# if keys is None: +# common_caps = set(gt.columns) & set(preds.columns) +# if not common_caps: +# logger.error( +# f'No common capabilities found. Predictions has "{preds.columns}",' +# f' ground-truth has "{gt.columns}".' +# ) +# return None + +# # Finding common images between predictions and ground-truth. +# common_inds = set(preds.index) & set(gt.index) +# if not common_inds: +# logger.error("No common images found between predictions and ground-truth.") +# return None + +# metric_vals = dict() +# for cap in common_caps: +# if isinstance(preds[cap].iloc[0], (float, int)): +# metric_vals[cap] = { +# "mean_absolute_error": mean_absolute_error(gt[cap], preds[cap]), +# "max_error": max_error(gt[cap], preds[cap]), +# } +# else: +# labels = possible_caps[cap] +# if possible_caps is None: +# labels = sorted(list(set(preds[cap].unique()) | set(gt[cap].unique()))) +# metric_vals[cap] = { +# "accuracy": accuracy_score(gt[cap], preds[cap]), +# "balanced_accuracy": balanced_accuracy_score(gt[cap], preds[cap]), +# "cohen-kappa": cohen_kappa_score(gt[cap], preds[cap], labels=labels), +# } + +# return pd.DataFrame.from_dict(metric_vals) + + +# def calc_metrics_per_subgroup( +# gt: pd.DataFrame, +# preds: pd.DataFrame, +# model_cls: BaseEstimator, +# metrics: list[str] = [ +# "accuracy", +# ], +# ) -> pd.DataFrame: +# """Calculate performance metrics per sub-group for each capability. + +# Parameters +# ---------- +# gt: pd.DataFrame + +# preds: pd.DataFrame + +# model_cls: BaseEstimator-derived class + +# Returns +# ------- +# metrics: dict[Capability, dict[Any, dict]] +# """ +# common_caps = set(gt.columns) & set(preds.columns) + +# # metrics = {} +# index = sorted( +# [ +# model_cls.possible_capability_values(c) +# for c in common_caps +# if c != Capability.AGE +# ], +# key=len, +# ) +# index = product(*index) + +# metrics = ["accuracy", ""] +# df = pd.DataFrame(index=index, columns=metrics) + +# for cap in common_caps: +# # TODO(gschardong): Better to store the "type" of each capability +# # somewhere and test all numeric types here. +# if cap == Capability.AGE: +# continue + +# other_caps = common_caps - set([cap]) + +# # TODO(gschardong): Do we only need the values that occur in the data, +# # or all possible values? If the first is true, then we need to fetch +# # from the model class itself, else, we keep it as is. +# unique_values_cap = set(gt[cap].unique()) | set(preds[cap].unique()) + +# metrics[cap] = {} +# for val in unique_values_cap: +# ids = gt.index[gt[cap] == val] +# metrics[cap][val] = {"number_of_elements": len(ids)} +# for ocap in other_caps: +# metrics[cap][val][ocap] = {} +# fpred_data = preds[ocap][ids] +# fgt_data = gt[ocap][ids] + +# if isinstance(fpred_data[0], (float, int)): +# # If data is numeric, we calculate regression-based metrics +# metrics[cap][val][ocap] = { +# "mean_absolute_error": mean_absolute_error( +# fgt_data, fpred_data +# ), +# "max_error": max_error(fgt_data, fpred_data), +# } +# else: +# unique_values_ocap = sorted( +# list(set(gt[ocap].unique()) | set(preds[ocap].unique())) +# ) +# unique_values_ocap = np.array(unique_values_ocap) + +# metrics_small = {} +# # if len(fgt_data.unique()) == 2: +# # cm = confusion_matrix(fgt_data, fpred_data, labels=unique_values_ocap) +# # else: +# cm = multilabel_confusion_matrix( +# fgt_data, fpred_data, labels=unique_values_ocap +# ) +# for m, oval in zip(cm, unique_values_ocap): +# # metrics[cap][val][ocap][oval] = m +# metrics_small[oval] = m +# return m + +# metrics[cap][val][ocap] = { +# "accuracy": accuracy_score(fgt_data, fpred_data), +# } + +# return metrics diff --git a/src/facebias/estimators/mivolo/README.md b/src/facebias/estimators/mivolo/README.md new file mode 100644 index 0000000..8d6368b --- /dev/null +++ b/src/facebias/estimators/mivolo/README.md @@ -0,0 +1,3 @@ +# Subset of MiVOLO code + +This is a subset of the [MiVOLO](https://github.com/wildchlamydia/mivolo) code necessary to instantiate the face-only attribute model. \ No newline at end of file