{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# NaN - Not a Number" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "from pandas import Series, DataFrame" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "float" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "n = np.nan\n", "type(n)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "nan" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 任何数值(整数、浮点数、0)与 NaN 作运算结果都是 NaN\n", "m = 1\n", "m + n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## NaN in Series" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "A 1.0\n", "B 2.0\n", "C NaN\n", "D 3.0\n", "E 4.0\n", "dtype: float64" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s1 = pd.Series([1, 2, np.nan, 3, 4], index=['A', 'B', 'C', 'D', 'E'])\n", "s1" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "A False\n", "B False\n", "C True\n", "D False\n", "E False\n", "dtype: bool" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 判断 Series 中的值是否是 NaN\n", "s1.isnull()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "A True\n", "B True\n", "C False\n", "D True\n", "E True\n", "dtype: bool" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 判断 Series 中的值是否不是 NaN\n", "s1.notnull()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "A 1.0\n", "B 2.0\n", "D 3.0\n", "E 4.0\n", "dtype: float64" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ " # 删除 Series 中的值是 NaN 的数据\n", "s1.dropna()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Nan in DataFrame" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012
01.02.03.0
1NaN5.06.0
27.0NaN9.0
3NaNNaNNaN
\n", "
" ], "text/plain": [ " 0 1 2\n", "0 1.0 2.0 3.0\n", "1 NaN 5.0 6.0\n", "2 7.0 NaN 9.0\n", "3 NaN NaN NaN" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1 = pd.DataFrame([[1, 2, 3], [np.nan, 5, 6], [7, np.nan, 9], [np.nan, np.nan, np.nan]])\n", "df1" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012
0FalseFalseFalse
1TrueFalseFalse
2FalseTrueFalse
3TrueTrueTrue
\n", "
" ], "text/plain": [ " 0 1 2\n", "0 False False False\n", "1 True False False\n", "2 False True False\n", "3 True True True" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1.isnull()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012
0TrueTrueTrue
1FalseTrueTrue
2TrueFalseTrue
3FalseFalseFalse
\n", "
" ], "text/plain": [ " 0 1 2\n", "0 True True True\n", "1 False True True\n", "2 True False True\n", "3 False False False" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1.notnull()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012
01.02.03.0
\n", "
" ], "text/plain": [ " 0 1 2\n", "0 1.0 2.0 3.0" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1.dropna(axis=0)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0
1
2
3
\n", "
" ], "text/plain": [ "Empty DataFrame\n", "Columns: []\n", "Index: [0, 1, 2, 3]" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1.dropna(axis=1)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012
01.02.03.0
1NaN5.06.0
27.0NaN9.0
\n", "
" ], "text/plain": [ " 0 1 2\n", "0 1.0 2.0 3.0\n", "1 NaN 5.0 6.0\n", "2 7.0 NaN 9.0" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1.dropna(how='all')" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012
0NaN5.06.0
17.0NaNNaN
2NaNNaNNaN
\n", "
" ], "text/plain": [ " 0 1 2\n", "0 NaN 5.0 6.0\n", "1 7.0 NaN NaN\n", "2 NaN NaN NaN" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2 = pd.DataFrame([[np.nan, 5, 6], [7, np.nan, np.nan], [np.nan, np.nan, np.nan]])\n", "df2" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012
0NaN5.06.0
\n", "
" ], "text/plain": [ " 0 1 2\n", "0 NaN 5.0 6.0" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# thresh 表示范围 删除少于 3 个非 NaN 值的行或列\n", "df2.dropna(thresh=2)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012
01.05.06.0
17.01.01.0
21.01.01.0
\n", "
" ], "text/plain": [ " 0 1 2\n", "0 1.0 5.0 6.0\n", "1 7.0 1.0 1.0\n", "2 1.0 1.0 1.0" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 替换值为 NaN 的值\n", "df2.fillna(value=1)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012
00.05.06.0
17.01.02.0
20.01.02.0
\n", "
" ], "text/plain": [ " 0 1 2\n", "0 0.0 5.0 6.0\n", "1 7.0 1.0 2.0\n", "2 0.0 1.0 2.0" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 按列替换值为 NaN 的值\n", "df2.fillna(value={0:0, 1:1, 2:2})" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.6" } }, "nbformat": 4, "nbformat_minor": 2 }