{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Series 和 DataFrame 的简单数学运算" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Series 的数学运算" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "s1 = pd.Series([1, 2, 3], index=['A', 'B', 'C'])\n", "s2 = pd.Series([4, 5, 6, 7], index=['B','C', 'D', 'E'])" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "A NaN\n", "B 6.0\n", "C 8.0\n", "D NaN\n", "E NaN\n", "dtype: float64" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Series 的运算为对应 index 的值的运算的集合\n", "s1 + s2" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## DataFrame 的数学运算" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "df1 = pd.DataFrame(np.arange(4).reshape(2, 2), index=['A', 'B'], columns=['BJ', 'SH'])\n", "df2 = pd.DataFrame(np.arange(9).reshape(3, 3), index=['A', 'B', 'C'], columns=['BJ', 'SH', 'GZ'])" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
BJGZSH
A0.0NaN2.0
B5.0NaN7.0
CNaNNaNNaN
\n", "
" ], "text/plain": [ " BJ GZ SH\n", "A 0.0 NaN 2.0\n", "B 5.0 NaN 7.0\n", "C NaN NaN NaN" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# DataFrame 的运算为对应 元素 的值的运算的集合\n", "df1 + df2" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
c1c2c3
A123.0
B45NaN
C789.0
\n", "
" ], "text/plain": [ " c1 c2 c3\n", "A 1 2 3.0\n", "B 4 5 NaN\n", "C 7 8 9.0" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df3 = pd.DataFrame([[1, 2, 3], [4, 5, np.nan], [7, 8, 9]], index=['A', 'B', 'C'], columns=['c1', 'c2', 'c3'])\n", "df3" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "A 6.0\n", "B 9.0\n", "C 24.0\n", "dtype: float64" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 计算每一列的和(NaN 会被忽略)放回一个 Series\n", "df3.sum() # axis=1 参数可以求每一行的值" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "c1 1.0\n", "c2 2.0\n", "c3 3.0\n", "dtype: float64" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 计算每一列的最小值(NaN 会被忽略)放回一个 Series\n", "df3.min() # axis=1 参数可以求每一行的值\n", "# 计算每一列的最大值(NaN 会被忽略)放回一个 Series\n", "df3.max() # axis=1 参数可以求每一行的值" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
c1c2c3
count3.03.02.000000
mean4.05.06.000000
std3.03.04.242641
min1.02.03.000000
25%2.53.54.500000
50%4.05.06.000000
75%5.56.57.500000
max7.08.09.000000
\n", "
" ], "text/plain": [ " c1 c2 c3\n", "count 3.0 3.0 2.000000\n", "mean 4.0 5.0 6.000000\n", "std 3.0 3.0 4.242641\n", "min 1.0 2.0 3.000000\n", "25% 2.5 3.5 4.500000\n", "50% 4.0 5.0 6.000000\n", "75% 5.5 6.5 7.500000\n", "max 7.0 8.0 9.000000" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 统计 参考 https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.describe.html\n", "df3.describe()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.6" } }, "nbformat": 4, "nbformat_minor": 2 }