{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Concatenate 和 Combine" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Concatenate" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[0, 1, 2],\n", " [3, 4, 5],\n", " [6, 7, 8]])" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "arr1 = np.arange(9).reshape(3, 3)\n", "arr2 = np.arange(9).reshape(3, 3)\n", "arr2" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[0, 1, 2],\n", " [3, 4, 5],\n", " [6, 7, 8],\n", " [0, 1, 2],\n", " [3, 4, 5],\n", " [6, 7, 8]])" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.concatenate([arr1, arr2])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## concat" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "s1 = pd.Series([1, 2, 3], index=['X', 'Y', 'Z'])\n", "s2 = pd.Series([1, 2], index=['A', 'B'])" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01
ANaN1.0
BNaN2.0
X1.0NaN
Y2.0NaN
Z3.0NaN
\n", "
" ], "text/plain": [ " 0 1\n", "A NaN 1.0\n", "B NaN 2.0\n", "X 1.0 NaN\n", "Y 2.0 NaN\n", "Z 3.0 NaN" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.concat([s1, s2], axis=1, sort=True)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "df1 = pd.DataFrame(np.random.randn(4, 3), columns=['X', 'Y', 'Z'])\n", "df2 = pd.DataFrame(np.random.randn(3, 3), columns=['X', 'Y', 'A'])" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AXYZ
0NaN-1.0418260.298051-0.701667
1NaN0.1885110.8959301.388733
2NaN0.897699-0.6969150.132036
3NaN-0.728917-0.2367221.331359
00.8682680.170405-0.517825NaN
1-0.697036-0.717582-0.138591NaN
21.1597731.046029-0.181497NaN
\n", "
" ], "text/plain": [ " A X Y Z\n", "0 NaN -1.041826 0.298051 -0.701667\n", "1 NaN 0.188511 0.895930 1.388733\n", "2 NaN 0.897699 -0.696915 0.132036\n", "3 NaN -0.728917 -0.236722 1.331359\n", "0 0.868268 0.170405 -0.517825 NaN\n", "1 -0.697036 -0.717582 -0.138591 NaN\n", "2 1.159773 1.046029 -0.181497 NaN" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.concat([df1, df2],sort=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Combine" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "s1 = pd.Series([2, np.nan, 4, np.nan], index=['A', 'B', 'C', 'D'])\n", "s2 = pd.Series([1, 2, 3, 4], index=['A', 'B', 'C', 'D'])" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "A 2.0\n", "B 2.0\n", "C 4.0\n", "D 4.0\n", "dtype: float64" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 使用 s2 填充 S1\n", "s1.combine_first(s2)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "df1 = pd.DataFrame({\n", " 'x': [1, np.nan, 3, np.nan], \n", " 'y': [5, np.nan, 7, np.nan], \n", " 'z': [9, np.nan, 11, np.nan], \n", "})\n", "df2 = pd.DataFrame({\n", " 'z': [np.nan, 10, np.nan, 12], \n", " 'a': [1, 2, 3, 4], \n", "})" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
axyz
01.01.05.09.0
12.0NaNNaN10.0
23.03.07.011.0
34.0NaNNaN12.0
\n", "
" ], "text/plain": [ " a x y z\n", "0 1.0 1.0 5.0 9.0\n", "1 2.0 NaN NaN 10.0\n", "2 3.0 3.0 7.0 11.0\n", "3 4.0 NaN NaN 12.0" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1.combine_first(df2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.6" } }, "nbformat": 4, "nbformat_minor": 2 }