{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# 深入理解 Series 和 DataFrame" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "from pandas import Series, DataFrame" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# 定义基础数据,后面的实例均基于此基础数据\n", "data = {'Country': ['Belgium', 'India', 'Brazil'], \n", " 'Capital': ['Brussels', 'New Delhi', 'Brasilia'], \n", " 'Population': [11190846, 1303171035, 207847528]}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Sreies" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "A Belgium\n", "B India\n", "C Brazil\n", "dtype: object" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s1 = pd.Series(data['Country'], index=['A', 'B', 'C'])\n", "s1" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['Belgium', 'India', 'Brazil'], dtype=object)" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s1.values" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['A', 'B', 'C'], dtype='object')" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s1.index" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## DataFrame" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CountryCapitalPopulation
0BelgiumBrussels11190846
1IndiaNew Delhi1303171035
2BrazilBrasilia207847528
\n", "
" ], "text/plain": [ " Country Capital Population\n", "0 Belgium Brussels 11190846\n", "1 India New Delhi 1303171035\n", "2 Brazil Brasilia 207847528" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1 = pd.DataFrame(data)\n", "df1" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 Belgium\n", "1 India\n", "2 Brazil\n", "Name: Country, dtype: object" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1['Country']" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "pandas.core.series.Series" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(df1['Country'])" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1.iterrows()" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "行:\n", "(0, Country Belgium\n", "Capital Brussels\n", "Population 11190846\n", "Name: 0, dtype: object)\n", "行的类型:\n", "\n", "行的长度:\n", "2\n", "行的第一部分:\n", "0\n", "行的第二部分:\n", "Country Belgium\n", "Capital Brussels\n", "Population 11190846\n", "Name: 0, dtype: object\n", "行的第一部分的类型:\n", "\n", "行的第二部分的类型:\n", "\n" ] } ], "source": [ "for row in df1.iterrows():\n", " print('行:'), print(row), \n", " print('行的类型:'), print(type(row)), \n", " print('行的长度:'), print(len(row)), \n", " print('行的第一部分:'), print(row[0]), \n", " print('行的第二部分:'), print(row[1]), \n", " print('行的第一部分的类型:'), print(type(row[0])), \n", " print('行的第二部分的类型:'), print(type(row[1]))\n", " break" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012
CapitalBrusselsNew DelhiBrasilia
CountryBelgiumIndiaBrazil
Population111908461303171035207847528
\n", "
" ], "text/plain": [ " 0 1 2\n", "Capital Brussels New Delhi Brasilia\n", "Country Belgium India Brazil\n", "Population 11190846 1303171035 207847528" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 从上面的结果可以看出 DataFrame 的行由 Series 组成,因此可以使用 Series 创建 DataFrame\n", "s1 = pd.Series(data['Capital'])\n", "s2 = pd.Series(data['Country'])\n", "s3 = pd.Series(data['Population'])\n", "df_new = pd.DataFrame([s1, s2, s3], index=['Capital', 'Country', 'Population'])\n", "df_new" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "df_new = df_new.T" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CapitalCountryPopulation
0BrusselsBelgium11190846
1New DelhiIndia1303171035
2BrasiliaBrazil207847528
\n", "
" ], "text/plain": [ " Capital Country Population\n", "0 Brussels Belgium 11190846\n", "1 New Delhi India 1303171035\n", "2 Brasilia Brazil 207847528" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_new" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.6" } }, "nbformat": 4, "nbformat_minor": 2 }