{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Data Wrangling - Aggregation on Multiple Columns"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### [Link to Source data](https://stackoverflow.com/questions/64903216/append-new-column-to-df-after-sum)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Task: Sum the columns based on the prefix of the individual columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style type='text/css'>\n",
       ".datatable table.frame { margin-bottom: 0; }\n",
       ".datatable table.frame thead { border-bottom: none; }\n",
       ".datatable table.frame tr.coltypes td {  color: #FFFFFF;  line-height: 6px;  padding: 0 0.5em;}\n",
       ".datatable .bool    { background: #DDDD99; }\n",
       ".datatable .object  { background: #565656; }\n",
       ".datatable .int     { background: #5D9E5D; }\n",
       ".datatable .float   { background: #4040CC; }\n",
       ".datatable .str     { background: #CC4040; }\n",
       ".datatable .time    { background: #40CC40; }\n",
       ".datatable .row_index {  background: var(--jp-border-color3);  border-right: 1px solid var(--jp-border-color0);  color: var(--jp-ui-font-color3);  font-size: 9px;}\n",
       ".datatable .frame tbody td { text-align: left; }\n",
       ".datatable .frame tr.coltypes .row_index {  background: var(--jp-border-color0);}\n",
       ".datatable th:nth-child(2) { padding-left: 12px; }\n",
       ".datatable .hellipsis {  color: var(--jp-cell-editor-border-color);}\n",
       ".datatable .vellipsis {  background: var(--jp-layout-color0);  color: var(--jp-cell-editor-border-color);}\n",
       ".datatable .na {  color: var(--jp-cell-editor-border-color);  font-size: 80%;}\n",
       ".datatable .sp {  opacity: 0.25;}\n",
       ".datatable .footer { font-size: 9px; }\n",
       ".datatable .frame_dimensions {  background: var(--jp-border-color3);  border-top: 1px solid var(--jp-border-color0);  color: var(--jp-ui-font-color3);  display: inline-block;  opacity: 0.6;  padding: 1px 10px 1px 5px;}\n",
       "</style>\n"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<div class='datatable'>\n",
       "  <table class='frame'>\n",
       "  <thead>\n",
       "    <tr class='colnames'><td class='row_index'></td><th>sn</th><th>C1-1</th><th>C1-2</th><th>C1-3</th><th>H2-1</th><th>H2-2</th><th>K3-1</th><th>K3-2</th></tr>\n",
       "    <tr class='coltypes'><td class='row_index'></td><td class='int' title='int32'>&#x25AA;&#x25AA;&#x25AA;&#x25AA;</td><td class='int' title='int32'>&#x25AA;&#x25AA;&#x25AA;&#x25AA;</td><td class='int' title='int32'>&#x25AA;&#x25AA;&#x25AA;&#x25AA;</td><td class='int' title='int32'>&#x25AA;&#x25AA;&#x25AA;&#x25AA;</td><td class='int' title='int32'>&#x25AA;&#x25AA;&#x25AA;&#x25AA;</td><td class='int' title='int32'>&#x25AA;&#x25AA;&#x25AA;&#x25AA;</td><td class='int' title='int32'>&#x25AA;&#x25AA;&#x25AA;&#x25AA;</td><td class='int' title='int32'>&#x25AA;&#x25AA;&#x25AA;&#x25AA;</td></tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr><td class='row_index'>0</td><td>1</td><td>4</td><td>3</td><td>5</td><td>4</td><td>1</td><td>4</td><td>2</td></tr>\n",
       "    <tr><td class='row_index'>1</td><td>2</td><td>2</td><td>2</td><td>0</td><td>2</td><td>0</td><td>1</td><td>2</td></tr>\n",
       "    <tr><td class='row_index'>2</td><td>3</td><td>1</td><td>2</td><td>0</td><td>0</td><td>2</td><td>1</td><td>2</td></tr>\n",
       "  </tbody>\n",
       "  </table>\n",
       "  <div class='footer'>\n",
       "    <div class='frame_dimensions'>3 rows &times; 8 columns</div>\n",
       "  </div>\n",
       "</div>\n"
      ],
      "text/plain": [
       "<Frame#7f07a84e3660 3x8>"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from datatable import dt, f\n",
    "\n",
    "df = dt.Frame(\n",
    "    {\n",
    "        \"sn\": [1, 2, 3],\n",
    "        \"C1-1\": [4, 2, 1],\n",
    "        \"C1-2\": [3, 2, 2],\n",
    "        \"C1-3\": [5, 0, 0],\n",
    "        \"H2-1\": [4, 2, 0],\n",
    "        \"H2-2\": [1, 0, 2],\n",
    "        \"K3-1\": [4, 1, 1],\n",
    "        \"K3-2\": [2, 2, 2],\n",
    "    }\n",
    ")\n",
    "\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## **Complete Solution**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "defaultdict(list,\n",
       "            {'total_C1': ['C1-1', 'C1-2', 'C1-3'],\n",
       "             'total_H2': ['H2-1', 'H2-2'],\n",
       "             'total_K3': ['K3-1', 'K3-2']})"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# import libraries\n",
    "from collections import defaultdict\n",
    "\n",
    "# iterate to pair prefix with relevant columns\n",
    "mapping = defaultdict(list)\n",
    "for entry in df.names[1:]:\n",
    "    key = f\"total_{entry[:2]}\"\n",
    "    mapping[key].append(entry)\n",
    "\n",
    "mapping"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[FExpr<alias(rowsum(f[['C1-1', 'C1-2', 'C1-3']]), [total_C1,])>,\n",
       " FExpr<alias(rowsum(f[['H2-1', 'H2-2']]), [total_H2,])>,\n",
       " FExpr<alias(rowsum(f[['K3-1', 'K3-2']]), [total_K3,])>]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mapping = [f[value].rowsum().alias(key) for key, value in mapping.items()]\n",
    "\n",
    "mapping"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div class='datatable'>\n",
       "  <table class='frame'>\n",
       "  <thead>\n",
       "    <tr class='colnames'><td class='row_index'></td><th>sn</th><th>total_C1</th><th>total_H2</th><th>total_K3</th></tr>\n",
       "    <tr class='coltypes'><td class='row_index'></td><td class='int' title='int32'>&#x25AA;&#x25AA;&#x25AA;&#x25AA;</td><td class='int' title='int32'>&#x25AA;&#x25AA;&#x25AA;&#x25AA;</td><td class='int' title='int32'>&#x25AA;&#x25AA;&#x25AA;&#x25AA;</td><td class='int' title='int32'>&#x25AA;&#x25AA;&#x25AA;&#x25AA;</td></tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr><td class='row_index'>0</td><td>1</td><td>12</td><td>5</td><td>6</td></tr>\n",
       "    <tr><td class='row_index'>1</td><td>2</td><td>4</td><td>2</td><td>3</td></tr>\n",
       "    <tr><td class='row_index'>2</td><td>3</td><td>3</td><td>2</td><td>3</td></tr>\n",
       "  </tbody>\n",
       "  </table>\n",
       "  <div class='footer'>\n",
       "    <div class='frame_dimensions'>3 rows &times; 4 columns</div>\n",
       "  </div>\n",
       "</div>\n"
      ],
      "text/plain": [
       "<Frame#7f07a84e33f0 3x4>"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# actual computation occurs here\n",
    "df[:, f.sn.extend(mapping)]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Comments\n",
    "<script src=\"https://utteranc.es/client.js\"\n",
    "        repo=\"samukweku/data-wrangling-blog\"\n",
    "        issue-term=\"title\"\n",
    "        theme=\"github-light\"\n",
    "        crossorigin=\"anonymous\"\n",
    "        async>\n",
    "</script>"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.18"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}