{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 深入理解 Series 和 DataFrame"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from pandas import Series, DataFrame"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# 定义基础数据,后面的实例均基于此基础数据\n",
"data = {'Country': ['Belgium', 'India', 'Brazil'], \n",
" 'Capital': ['Brussels', 'New Delhi', 'Brasilia'], \n",
" 'Population': [11190846, 1303171035, 207847528]}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Sreies"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"A Belgium\n",
"B India\n",
"C Brazil\n",
"dtype: object"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s1 = pd.Series(data['Country'], index=['A', 'B', 'C'])\n",
"s1"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['Belgium', 'India', 'Brazil'], dtype=object)"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s1.values"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['A', 'B', 'C'], dtype='object')"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s1.index"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## DataFrame"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Country | \n",
" Capital | \n",
" Population | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Belgium | \n",
" Brussels | \n",
" 11190846 | \n",
"
\n",
" \n",
" 1 | \n",
" India | \n",
" New Delhi | \n",
" 1303171035 | \n",
"
\n",
" \n",
" 2 | \n",
" Brazil | \n",
" Brasilia | \n",
" 207847528 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Country Capital Population\n",
"0 Belgium Brussels 11190846\n",
"1 India New Delhi 1303171035\n",
"2 Brazil Brasilia 207847528"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1 = pd.DataFrame(data)\n",
"df1"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 Belgium\n",
"1 India\n",
"2 Brazil\n",
"Name: Country, dtype: object"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1['Country']"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"pandas.core.series.Series"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"type(df1['Country'])"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1.iterrows()"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"行:\n",
"(0, Country Belgium\n",
"Capital Brussels\n",
"Population 11190846\n",
"Name: 0, dtype: object)\n",
"行的类型:\n",
"\n",
"行的长度:\n",
"2\n",
"行的第一部分:\n",
"0\n",
"行的第二部分:\n",
"Country Belgium\n",
"Capital Brussels\n",
"Population 11190846\n",
"Name: 0, dtype: object\n",
"行的第一部分的类型:\n",
"\n",
"行的第二部分的类型:\n",
"\n"
]
}
],
"source": [
"for row in df1.iterrows():\n",
" print('行:'), print(row), \n",
" print('行的类型:'), print(type(row)), \n",
" print('行的长度:'), print(len(row)), \n",
" print('行的第一部分:'), print(row[0]), \n",
" print('行的第二部分:'), print(row[1]), \n",
" print('行的第一部分的类型:'), print(type(row[0])), \n",
" print('行的第二部分的类型:'), print(type(row[1]))\n",
" break"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
"
\n",
" \n",
" \n",
" \n",
" Capital | \n",
" Brussels | \n",
" New Delhi | \n",
" Brasilia | \n",
"
\n",
" \n",
" Country | \n",
" Belgium | \n",
" India | \n",
" Brazil | \n",
"
\n",
" \n",
" Population | \n",
" 11190846 | \n",
" 1303171035 | \n",
" 207847528 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 0 1 2\n",
"Capital Brussels New Delhi Brasilia\n",
"Country Belgium India Brazil\n",
"Population 11190846 1303171035 207847528"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 从上面的结果可以看出 DataFrame 的行由 Series 组成,因此可以使用 Series 创建 DataFrame\n",
"s1 = pd.Series(data['Capital'])\n",
"s2 = pd.Series(data['Country'])\n",
"s3 = pd.Series(data['Population'])\n",
"df_new = pd.DataFrame([s1, s2, s3], index=['Capital', 'Country', 'Population'])\n",
"df_new"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"df_new = df_new.T"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Capital | \n",
" Country | \n",
" Population | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Brussels | \n",
" Belgium | \n",
" 11190846 | \n",
"
\n",
" \n",
" 1 | \n",
" New Delhi | \n",
" India | \n",
" 1303171035 | \n",
"
\n",
" \n",
" 2 | \n",
" Brasilia | \n",
" Brazil | \n",
" 207847528 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Capital Country Population\n",
"0 Brussels Belgium 11190846\n",
"1 New Delhi India 1303171035\n",
"2 Brasilia Brazil 207847528"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_new"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}