Data Wrangling - Aggregation on Multiple Columns

Data Wrangling - Aggregation on Multiple Columns#

Complete Solution#

# import libraries
from collections import defaultdict

# iterate to pair prefix with relevant columns
mapping = defaultdict(list)
for entry in df.names[1:]:
    key = f"total_{entry[:2]}"
    mapping[key].append(entry)

mapping
defaultdict(list,
            {'total_C1': ['C1-1', 'C1-2', 'C1-3'],
             'total_H2': ['H2-1', 'H2-2'],
             'total_K3': ['K3-1', 'K3-2']})
mapping = [f[value].rowsum().alias(key) for key, value in mapping.items()]

mapping
[FExpr<alias(rowsum(f[['C1-1', 'C1-2', 'C1-3']]), [total_C1,])>,
 FExpr<alias(rowsum(f[['H2-1', 'H2-2']]), [total_H2,])>,
 FExpr<alias(rowsum(f[['K3-1', 'K3-2']]), [total_K3,])>]
# actual computation occurs here
df[:, f.sn.extend(mapping)]
sntotal_C1total_H2total_K3
▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪
011256
12423
23323