Adding some missing files from the last commits.
This commit is contained in:
5
.gitignore
vendored
5
.gitignore
vendored
@@ -2,4 +2,7 @@
|
||||
.vscode
|
||||
*.egg-info
|
||||
landmark_models
|
||||
*__pycache__/
|
||||
*__pycache__/
|
||||
data/*
|
||||
models/*
|
||||
gpt*.py
|
||||
6
README.md
Normal file
6
README.md
Normal file
@@ -0,0 +1,6 @@
|
||||
# ISR Face Dataset/Model Bias Check API
|
||||
|
||||
We use [FairFace](https://github.com/dchen236/FairFace) and [MiVOLO](https://github.com/wildchlamydia/mivolo) version 1, face-only checkpoint.
|
||||
|
||||
## Dataset
|
||||
Download the tar file `VISTEAM-NAS/Public_Data/facing2-skin-tone-train-images.tar.bz2` to the `data` directory, and extract it. This dataset has balanced sex and skin-tone, and unbalanced age.
|
||||
@@ -23,7 +23,10 @@ class FaceBox:
|
||||
|
||||
# TODO(gschardong): Convert all CSV reading functions to pandas
|
||||
|
||||
def load_metadata(p: Path, key_id="image", key_proc_fn=None) -> dict[str, dict[str, str]]:
|
||||
|
||||
def load_metadata(
|
||||
p: Path, key_id="image", key_proc_fn=None
|
||||
) -> dict[str, dict[str, str]]:
|
||||
lines = []
|
||||
with open(p, newline="") as csvfile:
|
||||
dialect = csv.Sniffer().sniff(csvfile.read(1024))
|
||||
@@ -41,9 +44,7 @@ def load_metadata(p: Path, key_id="image", key_proc_fn=None) -> dict[str, dict[s
|
||||
|
||||
|
||||
def load_dataset(
|
||||
root: Path,
|
||||
meta_path: Path | None,
|
||||
imname_proc_fn: Callable |None
|
||||
root: Path, meta_path: Path | None, imname_proc_fn: Callable | None
|
||||
) -> tuple[dict[str, np.ndarray], dict[str, dict[str, Any]] | None]:
|
||||
"""
|
||||
if `meta_path` is `None`, we won't attempt to read it.
|
||||
@@ -83,7 +84,9 @@ def load_dataset(
|
||||
except cv2.error:
|
||||
logger.info(f'File "{p}" is not an image. Skipping.')
|
||||
else:
|
||||
proc_imname = imname_proc_fn(p.name) if imname_proc_fn is not None else str(p.name)
|
||||
proc_imname = (
|
||||
imname_proc_fn(p.name) if imname_proc_fn is not None else str(p.name)
|
||||
)
|
||||
ims[proc_imname] = im
|
||||
|
||||
if not metadata:
|
||||
@@ -92,3 +95,157 @@ def load_dataset(
|
||||
logger.error(f'Metadata file not found at "{meta_path}".')
|
||||
|
||||
return ims, metadata
|
||||
|
||||
|
||||
# def calc_model_performance(
|
||||
# gt: pd.DataFrame,
|
||||
# preds: pd.DataFrame,
|
||||
# keys: list[str] | None = None,
|
||||
# possible_caps: dict[Capability, Any] | None = None,
|
||||
# ) -> pd.DataFrame:
|
||||
# """
|
||||
# We assume that both `gt` and `preds` have the same structure. They should
|
||||
# be indexed by individual ID, such as the image name, and each value is a
|
||||
# dictionary with model prediction capabilities as keys (e.g., "age_group",
|
||||
# "sex", "skin-color", etc.), and the values are the predictions, or ground-truth
|
||||
# values for each ID/capability.
|
||||
|
||||
# if `keys` is empty, then we infer from common keys present in `preds` and `gt`.
|
||||
|
||||
# Parameters
|
||||
# ----------
|
||||
# gt: pd.DataFrame
|
||||
# preds: pd.DataFrame
|
||||
# keys: list[str] | None
|
||||
|
||||
# Returns
|
||||
# -------
|
||||
# metrics: pd.DataFrame
|
||||
# """
|
||||
# common_caps = keys
|
||||
# if keys is None:
|
||||
# common_caps = set(gt.columns) & set(preds.columns)
|
||||
# if not common_caps:
|
||||
# logger.error(
|
||||
# f'No common capabilities found. Predictions has "{preds.columns}",'
|
||||
# f' ground-truth has "{gt.columns}".'
|
||||
# )
|
||||
# return None
|
||||
|
||||
# # Finding common images between predictions and ground-truth.
|
||||
# common_inds = set(preds.index) & set(gt.index)
|
||||
# if not common_inds:
|
||||
# logger.error("No common images found between predictions and ground-truth.")
|
||||
# return None
|
||||
|
||||
# metric_vals = dict()
|
||||
# for cap in common_caps:
|
||||
# if isinstance(preds[cap].iloc[0], (float, int)):
|
||||
# metric_vals[cap] = {
|
||||
# "mean_absolute_error": mean_absolute_error(gt[cap], preds[cap]),
|
||||
# "max_error": max_error(gt[cap], preds[cap]),
|
||||
# }
|
||||
# else:
|
||||
# labels = possible_caps[cap]
|
||||
# if possible_caps is None:
|
||||
# labels = sorted(list(set(preds[cap].unique()) | set(gt[cap].unique())))
|
||||
# metric_vals[cap] = {
|
||||
# "accuracy": accuracy_score(gt[cap], preds[cap]),
|
||||
# "balanced_accuracy": balanced_accuracy_score(gt[cap], preds[cap]),
|
||||
# "cohen-kappa": cohen_kappa_score(gt[cap], preds[cap], labels=labels),
|
||||
# }
|
||||
|
||||
# return pd.DataFrame.from_dict(metric_vals)
|
||||
|
||||
|
||||
# def calc_metrics_per_subgroup(
|
||||
# gt: pd.DataFrame,
|
||||
# preds: pd.DataFrame,
|
||||
# model_cls: BaseEstimator,
|
||||
# metrics: list[str] = [
|
||||
# "accuracy",
|
||||
# ],
|
||||
# ) -> pd.DataFrame:
|
||||
# """Calculate performance metrics per sub-group for each capability.
|
||||
|
||||
# Parameters
|
||||
# ----------
|
||||
# gt: pd.DataFrame
|
||||
|
||||
# preds: pd.DataFrame
|
||||
|
||||
# model_cls: BaseEstimator-derived class
|
||||
|
||||
# Returns
|
||||
# -------
|
||||
# metrics: dict[Capability, dict[Any, dict]]
|
||||
# """
|
||||
# common_caps = set(gt.columns) & set(preds.columns)
|
||||
|
||||
# # metrics = {}
|
||||
# index = sorted(
|
||||
# [
|
||||
# model_cls.possible_capability_values(c)
|
||||
# for c in common_caps
|
||||
# if c != Capability.AGE
|
||||
# ],
|
||||
# key=len,
|
||||
# )
|
||||
# index = product(*index)
|
||||
|
||||
# metrics = ["accuracy", ""]
|
||||
# df = pd.DataFrame(index=index, columns=metrics)
|
||||
|
||||
# for cap in common_caps:
|
||||
# # TODO(gschardong): Better to store the "type" of each capability
|
||||
# # somewhere and test all numeric types here.
|
||||
# if cap == Capability.AGE:
|
||||
# continue
|
||||
|
||||
# other_caps = common_caps - set([cap])
|
||||
|
||||
# # TODO(gschardong): Do we only need the values that occur in the data,
|
||||
# # or all possible values? If the first is true, then we need to fetch
|
||||
# # from the model class itself, else, we keep it as is.
|
||||
# unique_values_cap = set(gt[cap].unique()) | set(preds[cap].unique())
|
||||
|
||||
# metrics[cap] = {}
|
||||
# for val in unique_values_cap:
|
||||
# ids = gt.index[gt[cap] == val]
|
||||
# metrics[cap][val] = {"number_of_elements": len(ids)}
|
||||
# for ocap in other_caps:
|
||||
# metrics[cap][val][ocap] = {}
|
||||
# fpred_data = preds[ocap][ids]
|
||||
# fgt_data = gt[ocap][ids]
|
||||
|
||||
# if isinstance(fpred_data[0], (float, int)):
|
||||
# # If data is numeric, we calculate regression-based metrics
|
||||
# metrics[cap][val][ocap] = {
|
||||
# "mean_absolute_error": mean_absolute_error(
|
||||
# fgt_data, fpred_data
|
||||
# ),
|
||||
# "max_error": max_error(fgt_data, fpred_data),
|
||||
# }
|
||||
# else:
|
||||
# unique_values_ocap = sorted(
|
||||
# list(set(gt[ocap].unique()) | set(preds[ocap].unique()))
|
||||
# )
|
||||
# unique_values_ocap = np.array(unique_values_ocap)
|
||||
|
||||
# metrics_small = {}
|
||||
# # if len(fgt_data.unique()) == 2:
|
||||
# # cm = confusion_matrix(fgt_data, fpred_data, labels=unique_values_ocap)
|
||||
# # else:
|
||||
# cm = multilabel_confusion_matrix(
|
||||
# fgt_data, fpred_data, labels=unique_values_ocap
|
||||
# )
|
||||
# for m, oval in zip(cm, unique_values_ocap):
|
||||
# # metrics[cap][val][ocap][oval] = m
|
||||
# metrics_small[oval] = m
|
||||
# return m
|
||||
|
||||
# metrics[cap][val][ocap] = {
|
||||
# "accuracy": accuracy_score(fgt_data, fpred_data),
|
||||
# }
|
||||
|
||||
# return metrics
|
||||
|
||||
3
src/facebias/estimators/mivolo/README.md
Normal file
3
src/facebias/estimators/mivolo/README.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# Subset of MiVOLO code
|
||||
|
||||
This is a subset of the [MiVOLO](https://github.com/wildchlamydia/mivolo) code necessary to instantiate the face-only attribute model.
|
||||
Reference in New Issue
Block a user