diff --git a/src/facebias/evaluation.py b/src/facebias/evaluation.py new file mode 100644 index 0000000..10ed570 --- /dev/null +++ b/src/facebias/evaluation.py @@ -0,0 +1,141 @@ +# -*- coding: utf-8 -*- + +"""Dataset/model evaluation functions.""" + +import logging +from itertools import permutations, combinations +from pathlib import Path + +import numpy as np +import pandas as pd +from scipy.stats import entropy + +from facebias.estimators import Capability +from facebias.metrics import gini + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger("facebias:evaluation.py") + + +# TODO(gschardong): Move to the same file as `load_dataset` +def _to_age_bracket(row): + iage = int(row["age"]) + if iage < 3: + return "00-02" + elif iage < 10: + return "03-09" + elif iage > 69: + return "70+" + + d = iage // 10 * 10 + return "{}-{}".format(d, d + 9) + + +if __name__ == "__main__": + import os + + logger.info(os.getcwd()) + + DATASET_PATH = Path("../../data/facing2-train/") + METADATA_PATH = DATASET_PATH / "meta-w-age.csv" + + meta = pd.read_csv(METADATA_PATH, sep=",", index_col="image") + meta[Capability.AGEGROUP.value] = meta.apply(_to_age_bracket, axis=1) + meta = meta.sort_index() + meta[Capability.AGEGROUP + "_cat"], _ = pd.factorize( + meta[Capability.AGEGROUP], sort=True + ) + meta[Capability.SEX + "_cat"], _ = pd.factorize(meta[Capability.SEX], sort=True) + + # GINI IMPURITY + # Lower values means a concentration of values around a single class, i.e. bias. + age_gini = gini(meta["age"]) + + # gt_age_group_ord = meta["age_group"].apply(lambda x: _agegroup_int_map[x]) + agegroup_gini = gini(meta[Capability.AGEGROUP + "_cat"]) + + # Should be close to 0.5, indicating a 50/50 split of males and females, + # representing maximum uncertainty. + sex_gini = gini(meta[Capability.SEX + "_cat"]) + + # SHANNON'S ENTROPY + count_per_agegroup = meta["age_group"].value_counts() + prob_per_agegroup = count_per_agegroup / count_per_agegroup.sum() + H_agegroup = entropy(prob_per_agegroup) + + count_per_sex = meta["sex"].value_counts() + prob_per_sex = count_per_sex / count_per_sex.sum() + H_sex = entropy(prob_per_sex) + + # Now, onto the subgroup metrics. + # The goal is to be able to answer the following types of questions: + # 1) How many women are in each age-bracket? + # 2) Given the population in age-bracket 20-49 years, how is their gender distribution? + # 3) Do we need to collect more images of new individuals? If so, what population should we focus on? + sex_gb = meta.groupby(Capability.SEX)[["age_group_cat"]] + agegroup_gb = meta.groupby(Capability.AGEGROUP)[["sex_cat"]] + + gini_per_sex = sex_gb.apply(gini) + gini_per_agegroup = agegroup_gb.apply(gini) + + # Prototype textual description of the dataset. To be incorporated into a + # "generate_report" function. + print( + f'The dataset "{DATASET_PATH.name}" has a total of {len(meta)} {meta.index.name}s,' + " with the following features/capabilities:" + ) + caps = [] + for c in Capability: + if c.value in meta: + caps.append(c) + print(f"- {c.value}") + + print("\nEach feature/capability has the following types and values:") + for c in caps: + if c == Capability.AGE: + print(f"{c.value}: numeric") + + else: + print(f"{c.value}: categorical") + print(f" - {sorted(meta[c].unique())}") + + print("\nData distribution statistics.") + for c in caps: + print(f'The feature/capability "{c}" has the following distribution of values:') + if c == Capability.AGE: + m1 = meta[c].min() + m2 = meta[c].max() + mean = meta[c].mean() + std = meta[c].std() + p25 = meta[c].quantile(0.25) + median = meta[c].median() + p75 = meta[c].quantile(0.75) + + print( + f" - min = {m1}, max = {m2}, mean = {mean:.2f}, std = {std:.2f}" + f" p25 = {p25}, p50 = {median}, p75 = {p75}" + ) + print(" Interqualtile ranges:") + print(f" - p25-min = {p25 - m1}") + print(f" - p50-p25 = {median - p25}") + print(f" - p75-p50 = {p75 - median}") + print(f" - max-p75 = {m2 - p75}") + else: + series = meta[c].value_counts().sort_index() + for s in series.index: + print(f" - {s}: {series[s]}") + + print("\nPer capability/class data distribution statistics.") + for c1, c2 in combinations(caps, 2): + if c1 == Capability.AGE: + continue + + if c2 != Capability.AGE: + gb = meta.groupby(c1)[[c2]] + print( + f'Grouping by "{c1}", the dataset has the following data distribution for "{c2}"' + ) + print(gb.value_counts().sort_index().unstack(level=c1).fillna(0)) + + # Diagnostics of biases in the dataset. To be incorporated into a + # "generate_diagnostics" function later on.