{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Reindexing Series&DataFrame" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "from pandas import Series, DataFrame" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Series reindex" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "A 1\n", "B 2\n", "C 3\n", "D 4\n", "dtype: int64" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s1 = pd.Series([1, 2, 3, 4], index=['A', 'B', 'C', 'D'])\n", "s1" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "A 1.0\n", "B 2.0\n", "C 3.0\n", "D 4.0\n", "E NaN\n", "dtype: float64" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 按住 Shift + Tab 可以查看帮助\n", "s1.reindex(index=['A', 'B', 'C', 'D', 'E'])" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "A 1\n", "B 2\n", "C 3\n", "D 4\n", "E 10\n", "dtype: int64" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s1.reindex(index=['A', 'B', 'C', 'D', 'E'], fill_value=10)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1 A\n", "3 B\n", "5 C\n", "dtype: object" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s2 = pd.Series(['A', 'B', 'C'], index=[1, 3, 5])\n", "s2" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 NaN\n", "1 A\n", "2 NaN\n", "3 B\n", "4 NaN\n", "5 C\n", "6 NaN\n", "dtype: object" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s2.reindex(index=range(7))" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 NaN\n", "1 A\n", "2 A\n", "3 B\n", "4 B\n", "5 C\n", "6 C\n", "dtype: object" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# method='ffill' 表示根据已有的值自动填充为 NaN 的值\n", "s2.reindex(index=range(7), method='ffill')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## DataFrame reindex" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
c1c2c3c4c5
A0.8611220.2664800.6997600.9408920.068831
B0.2490550.5427530.6069750.9618070.598670
D0.7015710.9713560.6296730.3196230.747501
E0.1925070.1251780.5985550.2541210.455577
F0.9820850.5213120.9063480.9123760.352556
\n", "
" ], "text/plain": [ " c1 c2 c3 c4 c5\n", "A 0.861122 0.266480 0.699760 0.940892 0.068831\n", "B 0.249055 0.542753 0.606975 0.961807 0.598670\n", "D 0.701571 0.971356 0.629673 0.319623 0.747501\n", "E 0.192507 0.125178 0.598555 0.254121 0.455577\n", "F 0.982085 0.521312 0.906348 0.912376 0.352556" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1 = pd.DataFrame(np.random.rand(25).reshape([5, 5]), index=['A', 'B', 'D', 'E', 'F'], columns=['c1', 'c2', 'c3', 'c4', 'c5'])\n", "df1" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
c1c2c3c4c5c6
ANaNNaNNaNNaNNaNNaN
BNaNNaNNaNNaNNaNNaN
CNaNNaNNaNNaNNaNNaN
DNaNNaNNaNNaNNaNNaN
ENaNNaNNaNNaNNaNNaN
\n", "
" ], "text/plain": [ " c1 c2 c3 c4 c5 c6\n", "A NaN NaN NaN NaN NaN NaN\n", "B NaN NaN NaN NaN NaN NaN\n", "C NaN NaN NaN NaN NaN NaN\n", "D NaN NaN NaN NaN NaN NaN\n", "E NaN NaN NaN NaN NaN NaN" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1.reindex(index=[['A', 'B', 'C', 'D', 'E']], columns=['c1', 'c2', 'c3', 'c4', 'c5', 'c6'])" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "C 3\n", "D 4\n", "dtype: int64" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s1.reindex(index=['C', 'D'])" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
c1c2c3c4c5
A0.3525910.4379140.1318920.2730350.463063
B0.3673520.2450980.0814780.7443480.990410
\n", "
" ], "text/plain": [ " c1 c2 c3 c4 c5\n", "A 0.352591 0.437914 0.131892 0.273035 0.463063\n", "B 0.367352 0.245098 0.081478 0.744348 0.990410" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1.reindex(index=['A', 'B'])" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "B 2\n", "C 3\n", "D 4\n", "dtype: int64" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s1.drop('A')" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
c1c2c3c4c5
B0.3673520.2450980.0814780.7443480.990410
D0.3642440.7513670.7822570.4430110.688876
E0.6158510.7254090.7419320.7918030.576622
F0.5991750.5434320.4431250.4719290.436654
\n", "
" ], "text/plain": [ " c1 c2 c3 c4 c5\n", "B 0.367352 0.245098 0.081478 0.744348 0.990410\n", "D 0.364244 0.751367 0.782257 0.443011 0.688876\n", "E 0.615851 0.725409 0.741932 0.791803 0.576622\n", "F 0.599175 0.543432 0.443125 0.471929 0.436654" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# axis 表示行和列\n", "# 0 表示行\n", "# 1 表示列\n", "df1.drop('A', axis=0)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.6" } }, "nbformat": 4, "nbformat_minor": 2 }