{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Concatenate 和 Combine"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Concatenate"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[0, 1, 2],\n",
" [3, 4, 5],\n",
" [6, 7, 8]])"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"arr1 = np.arange(9).reshape(3, 3)\n",
"arr2 = np.arange(9).reshape(3, 3)\n",
"arr2"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[0, 1, 2],\n",
" [3, 4, 5],\n",
" [6, 7, 8],\n",
" [0, 1, 2],\n",
" [3, 4, 5],\n",
" [6, 7, 8]])"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.concatenate([arr1, arr2])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## concat"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"s1 = pd.Series([1, 2, 3], index=['X', 'Y', 'Z'])\n",
"s2 = pd.Series([1, 2], index=['A', 'B'])"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" \n",
" \n",
" A | \n",
" NaN | \n",
" 1.0 | \n",
"
\n",
" \n",
" B | \n",
" NaN | \n",
" 2.0 | \n",
"
\n",
" \n",
" X | \n",
" 1.0 | \n",
" NaN | \n",
"
\n",
" \n",
" Y | \n",
" 2.0 | \n",
" NaN | \n",
"
\n",
" \n",
" Z | \n",
" 3.0 | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 0 1\n",
"A NaN 1.0\n",
"B NaN 2.0\n",
"X 1.0 NaN\n",
"Y 2.0 NaN\n",
"Z 3.0 NaN"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.concat([s1, s2], axis=1, sort=True)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"df1 = pd.DataFrame(np.random.randn(4, 3), columns=['X', 'Y', 'Z'])\n",
"df2 = pd.DataFrame(np.random.randn(3, 3), columns=['X', 'Y', 'A'])"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" A | \n",
" X | \n",
" Y | \n",
" Z | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" NaN | \n",
" -1.041826 | \n",
" 0.298051 | \n",
" -0.701667 | \n",
"
\n",
" \n",
" 1 | \n",
" NaN | \n",
" 0.188511 | \n",
" 0.895930 | \n",
" 1.388733 | \n",
"
\n",
" \n",
" 2 | \n",
" NaN | \n",
" 0.897699 | \n",
" -0.696915 | \n",
" 0.132036 | \n",
"
\n",
" \n",
" 3 | \n",
" NaN | \n",
" -0.728917 | \n",
" -0.236722 | \n",
" 1.331359 | \n",
"
\n",
" \n",
" 0 | \n",
" 0.868268 | \n",
" 0.170405 | \n",
" -0.517825 | \n",
" NaN | \n",
"
\n",
" \n",
" 1 | \n",
" -0.697036 | \n",
" -0.717582 | \n",
" -0.138591 | \n",
" NaN | \n",
"
\n",
" \n",
" 2 | \n",
" 1.159773 | \n",
" 1.046029 | \n",
" -0.181497 | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" A X Y Z\n",
"0 NaN -1.041826 0.298051 -0.701667\n",
"1 NaN 0.188511 0.895930 1.388733\n",
"2 NaN 0.897699 -0.696915 0.132036\n",
"3 NaN -0.728917 -0.236722 1.331359\n",
"0 0.868268 0.170405 -0.517825 NaN\n",
"1 -0.697036 -0.717582 -0.138591 NaN\n",
"2 1.159773 1.046029 -0.181497 NaN"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.concat([df1, df2],sort=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Combine"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"s1 = pd.Series([2, np.nan, 4, np.nan], index=['A', 'B', 'C', 'D'])\n",
"s2 = pd.Series([1, 2, 3, 4], index=['A', 'B', 'C', 'D'])"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"A 2.0\n",
"B 2.0\n",
"C 4.0\n",
"D 4.0\n",
"dtype: float64"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 使用 s2 填充 S1\n",
"s1.combine_first(s2)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"df1 = pd.DataFrame({\n",
" 'x': [1, np.nan, 3, np.nan], \n",
" 'y': [5, np.nan, 7, np.nan], \n",
" 'z': [9, np.nan, 11, np.nan], \n",
"})\n",
"df2 = pd.DataFrame({\n",
" 'z': [np.nan, 10, np.nan, 12], \n",
" 'a': [1, 2, 3, 4], \n",
"})"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" x | \n",
" y | \n",
" z | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1.0 | \n",
" 1.0 | \n",
" 5.0 | \n",
" 9.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 2.0 | \n",
" NaN | \n",
" NaN | \n",
" 10.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 3.0 | \n",
" 3.0 | \n",
" 7.0 | \n",
" 11.0 | \n",
"
\n",
" \n",
" 3 | \n",
" 4.0 | \n",
" NaN | \n",
" NaN | \n",
" 12.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a x y z\n",
"0 1.0 1.0 5.0 9.0\n",
"1 2.0 NaN NaN 10.0\n",
"2 3.0 3.0 7.0 11.0\n",
"3 4.0 NaN NaN 12.0"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1.combine_first(df2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}