{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Series 和 DataFrame 的简单数学运算"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Series 的数学运算"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"s1 = pd.Series([1, 2, 3], index=['A', 'B', 'C'])\n",
"s2 = pd.Series([4, 5, 6, 7], index=['B','C', 'D', 'E'])"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"A NaN\n",
"B 6.0\n",
"C 8.0\n",
"D NaN\n",
"E NaN\n",
"dtype: float64"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Series 的运算为对应 index 的值的运算的集合\n",
"s1 + s2"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## DataFrame 的数学运算"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"df1 = pd.DataFrame(np.arange(4).reshape(2, 2), index=['A', 'B'], columns=['BJ', 'SH'])\n",
"df2 = pd.DataFrame(np.arange(9).reshape(3, 3), index=['A', 'B', 'C'], columns=['BJ', 'SH', 'GZ'])"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" BJ | \n",
" GZ | \n",
" SH | \n",
"
\n",
" \n",
" \n",
" \n",
" A | \n",
" 0.0 | \n",
" NaN | \n",
" 2.0 | \n",
"
\n",
" \n",
" B | \n",
" 5.0 | \n",
" NaN | \n",
" 7.0 | \n",
"
\n",
" \n",
" C | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" BJ GZ SH\n",
"A 0.0 NaN 2.0\n",
"B 5.0 NaN 7.0\n",
"C NaN NaN NaN"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# DataFrame 的运算为对应 元素 的值的运算的集合\n",
"df1 + df2"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" c1 | \n",
" c2 | \n",
" c3 | \n",
"
\n",
" \n",
" \n",
" \n",
" A | \n",
" 1 | \n",
" 2 | \n",
" 3.0 | \n",
"
\n",
" \n",
" B | \n",
" 4 | \n",
" 5 | \n",
" NaN | \n",
"
\n",
" \n",
" C | \n",
" 7 | \n",
" 8 | \n",
" 9.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" c1 c2 c3\n",
"A 1 2 3.0\n",
"B 4 5 NaN\n",
"C 7 8 9.0"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df3 = pd.DataFrame([[1, 2, 3], [4, 5, np.nan], [7, 8, 9]], index=['A', 'B', 'C'], columns=['c1', 'c2', 'c3'])\n",
"df3"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"A 6.0\n",
"B 9.0\n",
"C 24.0\n",
"dtype: float64"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 计算每一列的和(NaN 会被忽略)放回一个 Series\n",
"df3.sum() # axis=1 参数可以求每一行的值"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"c1 1.0\n",
"c2 2.0\n",
"c3 3.0\n",
"dtype: float64"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 计算每一列的最小值(NaN 会被忽略)放回一个 Series\n",
"df3.min() # axis=1 参数可以求每一行的值\n",
"# 计算每一列的最大值(NaN 会被忽略)放回一个 Series\n",
"df3.max() # axis=1 参数可以求每一行的值"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" c1 | \n",
" c2 | \n",
" c3 | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 3.0 | \n",
" 3.0 | \n",
" 2.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 4.0 | \n",
" 5.0 | \n",
" 6.000000 | \n",
"
\n",
" \n",
" std | \n",
" 3.0 | \n",
" 3.0 | \n",
" 4.242641 | \n",
"
\n",
" \n",
" min | \n",
" 1.0 | \n",
" 2.0 | \n",
" 3.000000 | \n",
"
\n",
" \n",
" 25% | \n",
" 2.5 | \n",
" 3.5 | \n",
" 4.500000 | \n",
"
\n",
" \n",
" 50% | \n",
" 4.0 | \n",
" 5.0 | \n",
" 6.000000 | \n",
"
\n",
" \n",
" 75% | \n",
" 5.5 | \n",
" 6.5 | \n",
" 7.500000 | \n",
"
\n",
" \n",
" max | \n",
" 7.0 | \n",
" 8.0 | \n",
" 9.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" c1 c2 c3\n",
"count 3.0 3.0 2.000000\n",
"mean 4.0 5.0 6.000000\n",
"std 3.0 3.0 4.242641\n",
"min 1.0 2.0 3.000000\n",
"25% 2.5 3.5 4.500000\n",
"50% 4.0 5.0 6.000000\n",
"75% 5.5 6.5 7.500000\n",
"max 7.0 8.0 9.000000"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 统计 参考 https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.describe.html\n",
"df3.describe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}