{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "e91239ad",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "5803418b",
"metadata": {},
"outputs": [],
"source": [
"# rowdata = {\n",
"# \"电影名称\": ['功夫熊猫', '叶问3', '伦敦陷落', '代理情人', '新步步惊心', '谍影重重', '功夫熊猫', '美人鱼', '宝贝当家', '唐人街探案'],\n",
"# \"搞笑镜头\": [39,3,2,9,8,5,39,21,45,23],\n",
"# \"拥抱镜头\": [0,2,3,38,34,2,0,17,2,3],\n",
"# \"打斗镜头\": [31,65,55,2,17,57,31,5,9,17],\n",
"# \"电影类型\": [\"喜剧片\", \"动作片\", \"动作片\", \"爱情片\", \"爱情片\", \"动作片\", \"喜剧片\", \"喜剧片\", \"喜剧片\"]\n",
"# }\n",
"rowdata = {\n",
" \"电影名称\": ['功夫熊猫', '叶问3', '伦敦陷落', '代理情人', '新步步惊心', '谍影重重', '功夫熊猫', '美人鱼', '宝贝当家'],\n",
" \"搞笑镜头\": [39,3,2,9,8,5,39,21,45],\n",
" \"拥抱镜头\": [0,2,3,38,34,2,0,17,2],\n",
" \"打斗镜头\": [31,65,55,2,17,57,31,5,9],\n",
" \"电影类型\": [\"喜剧片\", \"动作片\", \"动作片\", \"爱情片\", \"爱情片\", \"动作片\", \"喜剧片\", \"喜剧片\", \"喜剧片\"]\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "e1d2a6c4",
"metadata": {},
"outputs": [],
"source": [
"movie_data = pd.DataFrame(rowdata)\n",
"# print(movie_data)"
]
},
{
"cell_type": "code",
"execution_count": 53,
"id": "7e0058cc",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 电影名称 | \n",
" 搞笑镜头 | \n",
" 拥抱镜头 | \n",
" 打斗镜头 | \n",
" 电影类型 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 功夫熊猫 | \n",
" 39 | \n",
" 0 | \n",
" 31 | \n",
" 喜剧片 | \n",
"
\n",
" \n",
" 1 | \n",
" 叶问3 | \n",
" 3 | \n",
" 2 | \n",
" 65 | \n",
" 动作片 | \n",
"
\n",
" \n",
" 2 | \n",
" 伦敦陷落 | \n",
" 2 | \n",
" 3 | \n",
" 55 | \n",
" 动作片 | \n",
"
\n",
" \n",
" 3 | \n",
" 代理情人 | \n",
" 9 | \n",
" 38 | \n",
" 2 | \n",
" 爱情片 | \n",
"
\n",
" \n",
" 4 | \n",
" 新步步惊心 | \n",
" 8 | \n",
" 34 | \n",
" 17 | \n",
" 爱情片 | \n",
"
\n",
" \n",
" 5 | \n",
" 谍影重重 | \n",
" 5 | \n",
" 2 | \n",
" 57 | \n",
" 动作片 | \n",
"
\n",
" \n",
" 6 | \n",
" 功夫熊猫 | \n",
" 39 | \n",
" 0 | \n",
" 31 | \n",
" 喜剧片 | \n",
"
\n",
" \n",
" 7 | \n",
" 美人鱼 | \n",
" 21 | \n",
" 17 | \n",
" 5 | \n",
" 喜剧片 | \n",
"
\n",
" \n",
" 8 | \n",
" 宝贝当家 | \n",
" 45 | \n",
" 2 | \n",
" 9 | \n",
" 喜剧片 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 电影名称 搞笑镜头 拥抱镜头 打斗镜头 电影类型\n",
"0 功夫熊猫 39 0 31 喜剧片\n",
"1 叶问3 3 2 65 动作片\n",
"2 伦敦陷落 2 3 55 动作片\n",
"3 代理情人 9 38 2 爱情片\n",
"4 新步步惊心 8 34 17 爱情片\n",
"5 谍影重重 5 2 57 动作片\n",
"6 功夫熊猫 39 0 31 喜剧片\n",
"7 美人鱼 21 17 5 喜剧片\n",
"8 宝贝当家 45 2 9 喜剧片"
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"movie_data"
]
},
{
"cell_type": "code",
"execution_count": 54,
"id": "0b06ed25",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 21.470911\n",
"1 52.009614\n",
"2 43.416587\n",
"3 40.570926\n",
"4 34.438351\n",
"5 43.874822\n",
"6 21.470911\n",
"7 18.547237\n",
"8 23.430749\n",
"dtype: float64"
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"new_data = [23, 3, 17]\n",
"data = ((movie_data.iloc[:9, 1:4] - new_data) ** 2).sum(1) ** 0.5\n",
"data\n",
"# dist = "
]
},
{
"cell_type": "code",
"execution_count": 55,
"id": "59e75138",
"metadata": {},
"outputs": [],
"source": [
"# movie_data.iloc[:9, 4]\n",
"k = 5\n",
"data_l = pd.DataFrame({\n",
" \"data\": data,\n",
" \"labels\": (movie_data.iloc[:9, 4]),\n",
"})"
]
},
{
"cell_type": "code",
"execution_count": 56,
"id": "0092d0e3",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" data | \n",
" labels | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 21.470911 | \n",
" 喜剧片 | \n",
"
\n",
" \n",
" 1 | \n",
" 52.009614 | \n",
" 动作片 | \n",
"
\n",
" \n",
" 2 | \n",
" 43.416587 | \n",
" 动作片 | \n",
"
\n",
" \n",
" 3 | \n",
" 40.570926 | \n",
" 爱情片 | \n",
"
\n",
" \n",
" 4 | \n",
" 34.438351 | \n",
" 爱情片 | \n",
"
\n",
" \n",
" 5 | \n",
" 43.874822 | \n",
" 动作片 | \n",
"
\n",
" \n",
" 6 | \n",
" 21.470911 | \n",
" 喜剧片 | \n",
"
\n",
" \n",
" 7 | \n",
" 18.547237 | \n",
" 喜剧片 | \n",
"
\n",
" \n",
" 8 | \n",
" 23.430749 | \n",
" 喜剧片 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" data labels\n",
"0 21.470911 喜剧片\n",
"1 52.009614 动作片\n",
"2 43.416587 动作片\n",
"3 40.570926 爱情片\n",
"4 34.438351 爱情片\n",
"5 43.874822 动作片\n",
"6 21.470911 喜剧片\n",
"7 18.547237 喜剧片\n",
"8 23.430749 喜剧片"
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data_l"
]
},
{
"cell_type": "code",
"execution_count": 57,
"id": "e448a633",
"metadata": {},
"outputs": [],
"source": [
"sort_data = data_l.sort_values(by=\"data\")"
]
},
{
"cell_type": "code",
"execution_count": 58,
"id": "59fc7986",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" data | \n",
" labels | \n",
"
\n",
" \n",
" \n",
" \n",
" 7 | \n",
" 18.547237 | \n",
" 喜剧片 | \n",
"
\n",
" \n",
" 0 | \n",
" 21.470911 | \n",
" 喜剧片 | \n",
"
\n",
" \n",
" 6 | \n",
" 21.470911 | \n",
" 喜剧片 | \n",
"
\n",
" \n",
" 8 | \n",
" 23.430749 | \n",
" 喜剧片 | \n",
"
\n",
" \n",
" 4 | \n",
" 34.438351 | \n",
" 爱情片 | \n",
"
\n",
" \n",
" 3 | \n",
" 40.570926 | \n",
" 爱情片 | \n",
"
\n",
" \n",
" 2 | \n",
" 43.416587 | \n",
" 动作片 | \n",
"
\n",
" \n",
" 5 | \n",
" 43.874822 | \n",
" 动作片 | \n",
"
\n",
" \n",
" 1 | \n",
" 52.009614 | \n",
" 动作片 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" data labels\n",
"7 18.547237 喜剧片\n",
"0 21.470911 喜剧片\n",
"6 21.470911 喜剧片\n",
"8 23.430749 喜剧片\n",
"4 34.438351 爱情片\n",
"3 40.570926 爱情片\n",
"2 43.416587 动作片\n",
"5 43.874822 动作片\n",
"1 52.009614 动作片"
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sort_data"
]
},
{
"cell_type": "code",
"execution_count": 59,
"id": "c424e45d",
"metadata": {},
"outputs": [],
"source": [
"select_k = sort_data[:k]"
]
},
{
"cell_type": "code",
"execution_count": 60,
"id": "b1b4d542",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" data | \n",
" labels | \n",
"
\n",
" \n",
" \n",
" \n",
" 7 | \n",
" 18.547237 | \n",
" 喜剧片 | \n",
"
\n",
" \n",
" 0 | \n",
" 21.470911 | \n",
" 喜剧片 | \n",
"
\n",
" \n",
" 6 | \n",
" 21.470911 | \n",
" 喜剧片 | \n",
"
\n",
" \n",
" 8 | \n",
" 23.430749 | \n",
" 喜剧片 | \n",
"
\n",
" \n",
" 4 | \n",
" 34.438351 | \n",
" 爱情片 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" data labels\n",
"7 18.547237 喜剧片\n",
"0 21.470911 喜剧片\n",
"6 21.470911 喜剧片\n",
"8 23.430749 喜剧片\n",
"4 34.438351 爱情片"
]
},
"execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"select_k"
]
},
{
"cell_type": "code",
"execution_count": 61,
"id": "aa754a73",
"metadata": {},
"outputs": [],
"source": [
"frequency1 = select_k.loc[:, \"labels\"]"
]
},
{
"cell_type": "code",
"execution_count": 62,
"id": "0a4855f3",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"7 喜剧片\n",
"0 喜剧片\n",
"6 喜剧片\n",
"8 喜剧片\n",
"4 爱情片\n",
"Name: labels, dtype: object"
]
},
"execution_count": 62,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"frequency1"
]
},
{
"cell_type": "code",
"execution_count": 63,
"id": "d2a8eda4",
"metadata": {},
"outputs": [],
"source": [
"result = frequency1.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 64,
"id": "905e2b9e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"喜剧片 4\n",
"爱情片 1\n",
"Name: labels, dtype: int64"
]
},
"execution_count": 64,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"result"
]
},
{
"cell_type": "code",
"execution_count": 66,
"id": "75b0b58c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'喜剧片'"
]
},
"execution_count": 66,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"result.index[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0d463a29",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}