{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# NaN - Not a Number"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from pandas import Series, DataFrame"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"float"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"n = np.nan\n",
"type(n)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"nan"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 任何数值(整数、浮点数、0)与 NaN 作运算结果都是 NaN\n",
"m = 1\n",
"m + n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## NaN in Series"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"A 1.0\n",
"B 2.0\n",
"C NaN\n",
"D 3.0\n",
"E 4.0\n",
"dtype: float64"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s1 = pd.Series([1, 2, np.nan, 3, 4], index=['A', 'B', 'C', 'D', 'E'])\n",
"s1"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"A False\n",
"B False\n",
"C True\n",
"D False\n",
"E False\n",
"dtype: bool"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 判断 Series 中的值是否是 NaN\n",
"s1.isnull()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"A True\n",
"B True\n",
"C False\n",
"D True\n",
"E True\n",
"dtype: bool"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 判断 Series 中的值是否不是 NaN\n",
"s1.notnull()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"A 1.0\n",
"B 2.0\n",
"D 3.0\n",
"E 4.0\n",
"dtype: float64"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
" # 删除 Series 中的值是 NaN 的数据\n",
"s1.dropna()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Nan in DataFrame"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1.0 | \n",
" 2.0 | \n",
" 3.0 | \n",
"
\n",
" \n",
" 1 | \n",
" NaN | \n",
" 5.0 | \n",
" 6.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 7.0 | \n",
" NaN | \n",
" 9.0 | \n",
"
\n",
" \n",
" 3 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 0 1 2\n",
"0 1.0 2.0 3.0\n",
"1 NaN 5.0 6.0\n",
"2 7.0 NaN 9.0\n",
"3 NaN NaN NaN"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1 = pd.DataFrame([[1, 2, 3], [np.nan, 5, 6], [7, np.nan, 9], [np.nan, np.nan, np.nan]])\n",
"df1"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" False | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
" 1 | \n",
" True | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
" 2 | \n",
" False | \n",
" True | \n",
" False | \n",
"
\n",
" \n",
" 3 | \n",
" True | \n",
" True | \n",
" True | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 0 1 2\n",
"0 False False False\n",
"1 True False False\n",
"2 False True False\n",
"3 True True True"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1.isnull()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" True | \n",
" True | \n",
" True | \n",
"
\n",
" \n",
" 1 | \n",
" False | \n",
" True | \n",
" True | \n",
"
\n",
" \n",
" 2 | \n",
" True | \n",
" False | \n",
" True | \n",
"
\n",
" \n",
" 3 | \n",
" False | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 0 1 2\n",
"0 True True True\n",
"1 False True True\n",
"2 True False True\n",
"3 False False False"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1.notnull()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1.0 | \n",
" 2.0 | \n",
" 3.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 0 1 2\n",
"0 1.0 2.0 3.0"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1.dropna(axis=0)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
"
\n",
" \n",
" 1 | \n",
"
\n",
" \n",
" 2 | \n",
"
\n",
" \n",
" 3 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: []\n",
"Index: [0, 1, 2, 3]"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1.dropna(axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1.0 | \n",
" 2.0 | \n",
" 3.0 | \n",
"
\n",
" \n",
" 1 | \n",
" NaN | \n",
" 5.0 | \n",
" 6.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 7.0 | \n",
" NaN | \n",
" 9.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 0 1 2\n",
"0 1.0 2.0 3.0\n",
"1 NaN 5.0 6.0\n",
"2 7.0 NaN 9.0"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1.dropna(how='all')"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" NaN | \n",
" 5.0 | \n",
" 6.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 7.0 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 2 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 0 1 2\n",
"0 NaN 5.0 6.0\n",
"1 7.0 NaN NaN\n",
"2 NaN NaN NaN"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2 = pd.DataFrame([[np.nan, 5, 6], [7, np.nan, np.nan], [np.nan, np.nan, np.nan]])\n",
"df2"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" NaN | \n",
" 5.0 | \n",
" 6.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 0 1 2\n",
"0 NaN 5.0 6.0"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# thresh 表示范围 删除少于 3 个非 NaN 值的行或列\n",
"df2.dropna(thresh=2)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1.0 | \n",
" 5.0 | \n",
" 6.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 7.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 0 1 2\n",
"0 1.0 5.0 6.0\n",
"1 7.0 1.0 1.0\n",
"2 1.0 1.0 1.0"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 替换值为 NaN 的值\n",
"df2.fillna(value=1)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0.0 | \n",
" 5.0 | \n",
" 6.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 7.0 | \n",
" 1.0 | \n",
" 2.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 2.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 0 1 2\n",
"0 0.0 5.0 6.0\n",
"1 7.0 1.0 2.0\n",
"2 0.0 1.0 2.0"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 按列替换值为 NaN 的值\n",
"df2.fillna(value={0:0, 1:1, 2:2})"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}