{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Reindexing Series&DataFrame"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from pandas import Series, DataFrame"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Series reindex"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"A 1\n",
"B 2\n",
"C 3\n",
"D 4\n",
"dtype: int64"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s1 = pd.Series([1, 2, 3, 4], index=['A', 'B', 'C', 'D'])\n",
"s1"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"A 1.0\n",
"B 2.0\n",
"C 3.0\n",
"D 4.0\n",
"E NaN\n",
"dtype: float64"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 按住 Shift + Tab 可以查看帮助\n",
"s1.reindex(index=['A', 'B', 'C', 'D', 'E'])"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"A 1\n",
"B 2\n",
"C 3\n",
"D 4\n",
"E 10\n",
"dtype: int64"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s1.reindex(index=['A', 'B', 'C', 'D', 'E'], fill_value=10)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1 A\n",
"3 B\n",
"5 C\n",
"dtype: object"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s2 = pd.Series(['A', 'B', 'C'], index=[1, 3, 5])\n",
"s2"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 NaN\n",
"1 A\n",
"2 NaN\n",
"3 B\n",
"4 NaN\n",
"5 C\n",
"6 NaN\n",
"dtype: object"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s2.reindex(index=range(7))"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 NaN\n",
"1 A\n",
"2 A\n",
"3 B\n",
"4 B\n",
"5 C\n",
"6 C\n",
"dtype: object"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# method='ffill' 表示根据已有的值自动填充为 NaN 的值\n",
"s2.reindex(index=range(7), method='ffill')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## DataFrame reindex"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" c1 | \n",
" c2 | \n",
" c3 | \n",
" c4 | \n",
" c5 | \n",
"
\n",
" \n",
" \n",
" \n",
" A | \n",
" 0.861122 | \n",
" 0.266480 | \n",
" 0.699760 | \n",
" 0.940892 | \n",
" 0.068831 | \n",
"
\n",
" \n",
" B | \n",
" 0.249055 | \n",
" 0.542753 | \n",
" 0.606975 | \n",
" 0.961807 | \n",
" 0.598670 | \n",
"
\n",
" \n",
" D | \n",
" 0.701571 | \n",
" 0.971356 | \n",
" 0.629673 | \n",
" 0.319623 | \n",
" 0.747501 | \n",
"
\n",
" \n",
" E | \n",
" 0.192507 | \n",
" 0.125178 | \n",
" 0.598555 | \n",
" 0.254121 | \n",
" 0.455577 | \n",
"
\n",
" \n",
" F | \n",
" 0.982085 | \n",
" 0.521312 | \n",
" 0.906348 | \n",
" 0.912376 | \n",
" 0.352556 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" c1 c2 c3 c4 c5\n",
"A 0.861122 0.266480 0.699760 0.940892 0.068831\n",
"B 0.249055 0.542753 0.606975 0.961807 0.598670\n",
"D 0.701571 0.971356 0.629673 0.319623 0.747501\n",
"E 0.192507 0.125178 0.598555 0.254121 0.455577\n",
"F 0.982085 0.521312 0.906348 0.912376 0.352556"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1 = pd.DataFrame(np.random.rand(25).reshape([5, 5]), index=['A', 'B', 'D', 'E', 'F'], columns=['c1', 'c2', 'c3', 'c4', 'c5'])\n",
"df1"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" c1 | \n",
" c2 | \n",
" c3 | \n",
" c4 | \n",
" c5 | \n",
" c6 | \n",
"
\n",
" \n",
" \n",
" \n",
" A | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" B | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" C | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" D | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" E | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" c1 c2 c3 c4 c5 c6\n",
"A NaN NaN NaN NaN NaN NaN\n",
"B NaN NaN NaN NaN NaN NaN\n",
"C NaN NaN NaN NaN NaN NaN\n",
"D NaN NaN NaN NaN NaN NaN\n",
"E NaN NaN NaN NaN NaN NaN"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1.reindex(index=[['A', 'B', 'C', 'D', 'E']], columns=['c1', 'c2', 'c3', 'c4', 'c5', 'c6'])"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"C 3\n",
"D 4\n",
"dtype: int64"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s1.reindex(index=['C', 'D'])"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" c1 | \n",
" c2 | \n",
" c3 | \n",
" c4 | \n",
" c5 | \n",
"
\n",
" \n",
" \n",
" \n",
" A | \n",
" 0.352591 | \n",
" 0.437914 | \n",
" 0.131892 | \n",
" 0.273035 | \n",
" 0.463063 | \n",
"
\n",
" \n",
" B | \n",
" 0.367352 | \n",
" 0.245098 | \n",
" 0.081478 | \n",
" 0.744348 | \n",
" 0.990410 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" c1 c2 c3 c4 c5\n",
"A 0.352591 0.437914 0.131892 0.273035 0.463063\n",
"B 0.367352 0.245098 0.081478 0.744348 0.990410"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1.reindex(index=['A', 'B'])"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"B 2\n",
"C 3\n",
"D 4\n",
"dtype: int64"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s1.drop('A')"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" c1 | \n",
" c2 | \n",
" c3 | \n",
" c4 | \n",
" c5 | \n",
"
\n",
" \n",
" \n",
" \n",
" B | \n",
" 0.367352 | \n",
" 0.245098 | \n",
" 0.081478 | \n",
" 0.744348 | \n",
" 0.990410 | \n",
"
\n",
" \n",
" D | \n",
" 0.364244 | \n",
" 0.751367 | \n",
" 0.782257 | \n",
" 0.443011 | \n",
" 0.688876 | \n",
"
\n",
" \n",
" E | \n",
" 0.615851 | \n",
" 0.725409 | \n",
" 0.741932 | \n",
" 0.791803 | \n",
" 0.576622 | \n",
"
\n",
" \n",
" F | \n",
" 0.599175 | \n",
" 0.543432 | \n",
" 0.443125 | \n",
" 0.471929 | \n",
" 0.436654 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" c1 c2 c3 c4 c5\n",
"B 0.367352 0.245098 0.081478 0.744348 0.990410\n",
"D 0.364244 0.751367 0.782257 0.443011 0.688876\n",
"E 0.615851 0.725409 0.741932 0.791803 0.576622\n",
"F 0.599175 0.543432 0.443125 0.471929 0.436654"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# axis 表示行和列\n",
"# 0 表示行\n",
"# 1 表示列\n",
"df1.drop('A', axis=0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}