{"id":3015,"date":"2017-12-26T10:04:55","date_gmt":"2017-12-26T01:04:55","guid":{"rendered":"http:\/\/www.ois-yokohama.co.jp\/oisblog\/?p=3015"},"modified":"2022-09-28T11:44:55","modified_gmt":"2022-09-28T02:44:55","slug":"kaggle%e3%83%81%e3%83%a5%e3%83%bc%e3%83%88%e3%83%aa%e3%82%a2%e3%83%ab","status":"publish","type":"post","link":"https:\/\/www.ois-yokohama.co.jp\/oisblog2018\/archives\/3015","title":{"rendered":"Kaggle\u30c1\u30e5\u30fc\u30c8\u30ea\u30a2\u30eb"},"content":{"rendered":"<div class=\"veu_autoEyeCatchBox\"><img loading=\"lazy\" decoding=\"async\" width=\"750\" height=\"500\" src=\"https:\/\/www.ois-yokohama.co.jp\/oisblog2018\/wp-content\/uploads\/2017\/11\/pic_c074-1024x683.jpg\" class=\"attachment-large size-large wp-post-image\" alt=\"\" srcset=\"https:\/\/www.ois-yokohama.co.jp\/oisblog2018\/wp-content\/uploads\/2017\/11\/pic_c074-1024x683.jpg 1024w, https:\/\/www.ois-yokohama.co.jp\/oisblog2018\/wp-content\/uploads\/2017\/11\/pic_c074-300x200.jpg 300w, https:\/\/www.ois-yokohama.co.jp\/oisblog2018\/wp-content\/uploads\/2017\/11\/pic_c074-768x512.jpg 768w, https:\/\/www.ois-yokohama.co.jp\/oisblog2018\/wp-content\/uploads\/2017\/11\/pic_c074.jpg 1280w\" sizes=\"auto, (max-width: 750px) 100vw, 750px\" \/><\/div>\n<h3>Table of Contents<\/h3>\n<div id=\"content\">\n<div id=\"table-of-contents\">\n<div id=\"text-table-of-contents\">\n<ul>\n<li><a href=\"#sec-1\">1. Kaggle\u3068\u306f<\/a><\/li>\n<li><a href=\"#sec-2\">2. Titanic\u30c1\u30e5\u30fc\u30c8\u30ea\u30a2\u30eb<\/a><\/li>\n<li><a href=\"#sec-3\">3. \u74b0\u5883<\/a><\/li>\n<li><a href=\"#sec-4\">4. \u30c7\u30fc\u30bf\u8aad\u307f\u8fbc\u307f<\/a><\/li>\n<li><a href=\"#sec-5\">5. \u6b20\u640d\u30c7\u30fc\u30bf\u306e\u88dc\u5b8c<\/a><\/li>\n<li><a href=\"#sec-6\">6. \u30c0\u30df\u30fc\u5909\u6570\u5316<\/a><\/li>\n<li><a href=\"#sec-7\">7. \u30e2\u30c7\u30eb\u751f\u6210\u3068\u5b66\u7fd2<\/a><\/li>\n<li><a href=\"#sec-8\">8. \u4e88\u6e2c<\/a><\/li>\n<li><a href=\"#sec-9\">9. \u7d50\u679c\u3092\u30d5\u30a1\u30a4\u30eb\u51fa\u529b<\/a><\/li>\n<\/ul>\n<\/div>\n<\/div>\n<p>\u6a5f\u68b0\u5b66\u7fd2\u306e\u52c9\u5f37\u306e\u305f\u3081\u3001Kaggle\u306e\u30c1\u30e5\u30fc\u30c8\u30ea\u30a2\u30eb\u3092\u8a66\u3057\u3066\u307f\u307e\u3057\u305f\u3002<\/p>\n<p>&nbsp;<\/p>\n<div id=\"outline-container-sec-1\" class=\"outline-2\">\n<h3 id=\"sec-1\"><span class=\"section-number-2\">1<\/span> Kaggle\u3068\u306f<\/h3>\n<div id=\"text-1\" class=\"outline-text-2\">Kaggle\u306f\u30c7\u30fc\u30bf\u5206\u6790\u306eCompetition\u30b5\u30a4\u30c8\u3067\u3001\u4f01\u696d\u3084\u7814\u7a76\u8005\u304c\u6295\u7a3f\u3057\u305f\u30c6\u30fc\u30de\u3092\u69d8\u3005\u306a\u30c7\u30fc\u30bf\u5206\u6790\u624b\u6cd5\u3092\u99c6\u4f7f\u3057\u3066\u3001\u4e88\u6e2c\u30e2\u30c7\u30eb\u306e\u7cbe\u5ea6\u3092\u7af6\u3046\u5834\u6240\u306b\u306a\u308a\u307e\u3059\u3002<br \/>\ntrain\u30c7\u30fc\u30bf\u3068test\u30c7\u30fc\u30bf\u304c\u30b5\u30a4\u30c8\u4e0a\u304b\u3089\u6e21\u3055\u308c\u3001train\u30c7\u30fc\u30bf\u3092\u3082\u3068\u306b\u30e2\u30c7\u30ea\u30f3\u30b0\u3057\u3001test\u30c7\u30fc\u30bf\u3067\u4e88\u6e2c\u3057\u6295\u7a3f\u3059\u308b\u3001\u3068\u3044\u3046\u624b\u9806\u306b\u306a\u308a\u307e\u3059\u3002<br \/>\nKaggle\u306b\u306f\u521d\u5fc3\u8005\u7528\u306e\u30c1\u30e5\u30fc\u30c8\u30ea\u30a2\u30eb\u3082\u7528\u610f\u3055\u308c\u3066\u3044\u307e\u3059\u3002<\/div>\n<div><\/div>\n<\/div>\n<div id=\"outline-container-sec-2\" class=\"outline-2\">\n<h3 id=\"sec-2\"><span class=\"section-number-2\">2<\/span> Titanic\u30c1\u30e5\u30fc\u30c8\u30ea\u30a2\u30eb<\/h3>\n<div id=\"text-2\" class=\"outline-text-2\">\u4eca\u56de\u8a66\u3057\u305f\u30c1\u30e5\u30fc\u30c8\u30ea\u30a2\u30eb\u306f\u3001\u6709\u540d\u306a\u30bf\u30a4\u30bf\u30cb\u30c3\u30af\u53f7\u6c88\u6ca1\u4e8b\u6545\u3092\u984c\u6750\u306b\u3057\u305f\u3082\u306e\u3067\u3001<br \/>\n\u4e57\u5ba2\u306e\u5e74\u9f62\u3001\u6027\u5225\u3001\u793e\u4f1a\u968e\u7d1a\u3001\u306a\u3069\u306e\u30c7\u30fc\u30bf\u304b\u3089\u751f\u6b7b\u3092\u4e88\u6e2c\u3059\u308b\u3053\u3068\u304c\u8ab2\u984c\u3068\u306a\u308a\u307e\u3059\u3002<br \/>\n\u30c7\u30fc\u30bf\u30fb\u30bb\u30c3\u30c8\u306f\u4ee5\u4e0b\u306e\u901a\u308a\u3002&nbsp;<\/p>\n<table style=\"width: 76.1347%;\" border=\"2\" frame=\"hsides\" rules=\"groups\" cellspacing=\"0\" cellpadding=\"6\">\n<colgroup>\n<col class=\"left\" \/>\n<col class=\"left\" \/> <\/colgroup>\n<tbody>\n<tr>\n<td class=\"left\" style=\"width: 32.2314%;\">PassengerID<\/td>\n<td class=\"left\" style=\"width: 42.5039%;\">\u4e57\u5ba2ID<\/td>\n<\/tr>\n<tr>\n<td class=\"left\" style=\"width: 32.2314%;\">survival<\/td>\n<td class=\"left\" style=\"width: 42.5039%;\">\u751f\u6b7b(0 = No; 1 = Yes)<\/td>\n<\/tr>\n<tr>\n<td class=\"left\" style=\"width: 32.2314%;\">pclass<\/td>\n<td class=\"left\" style=\"width: 42.5039%;\">\u4e57\u5ba2\u306e\u793e\u4f1a\u968e\u7d1a<\/td>\n<\/tr>\n<tr>\n<td class=\"left\" style=\"width: 32.2314%;\">name<\/td>\n<td class=\"left\" style=\"width: 42.5039%;\">Name<\/td>\n<\/tr>\n<tr>\n<td class=\"left\" style=\"width: 32.2314%;\">sex<\/td>\n<td class=\"left\" style=\"width: 42.5039%;\">\u6027\u5225<\/td>\n<\/tr>\n<tr>\n<td class=\"left\" style=\"width: 32.2314%;\">age<\/td>\n<td class=\"left\" style=\"width: 42.5039%;\">\u5e74\u9f62<\/td>\n<\/tr>\n<tr>\n<td class=\"left\" style=\"width: 32.2314%;\">sibsp<\/td>\n<td class=\"left\" style=\"width: 42.5039%;\">\u4e57\u8239\u3057\u3066\u3044\u308b\u592b\u5a66\u3001\u5144\u5f1f\u59c9\u59b9\u306e\u6570<\/td>\n<\/tr>\n<tr>\n<td class=\"left\" style=\"width: 32.2314%;\">parch<\/td>\n<td class=\"left\" style=\"width: 42.5039%;\">\u4e57\u8239\u3057\u3066\u3044\u308b\u89aa\u3001\u5b50\u4f9b\u306e\u6570<\/td>\n<\/tr>\n<tr>\n<td class=\"left\" style=\"width: 32.2314%;\">ticket<\/td>\n<td class=\"left\" style=\"width: 42.5039%;\">\u30c1\u30b1\u30c3\u30c8No<\/td>\n<\/tr>\n<tr>\n<td class=\"left\" style=\"width: 32.2314%;\">fare<\/td>\n<td class=\"left\" style=\"width: 42.5039%;\">\u4e57\u8239\u6599\u91d1<\/td>\n<\/tr>\n<tr>\n<td class=\"left\" style=\"width: 32.2314%;\">cabin<\/td>\n<td class=\"left\" style=\"width: 42.5039%;\">\u8239\u5ba4<\/td>\n<\/tr>\n<tr>\n<td class=\"left\" style=\"width: 32.2314%;\">embarked<\/td>\n<td class=\"left\" style=\"width: 42.5039%;\">\u4e57\u8239\u5834\u6240<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<p>survival\u306f\u3001train\u30c7\u30fc\u30bf\u306e\u307f\u306b\u5b58\u5728\u3002\u4e0a\u8a18\u30c7\u30fc\u30bf\u304b\u3089test\u30c7\u30fc\u30bf\u306esurvival\u3092\u4e88\u6e2c\u3059\u308b\u3002<\/p>\n<p>&nbsp;<\/p>\n<\/div>\n<\/div>\n<div id=\"outline-container-sec-3\" class=\"outline-2\">\n<h3 id=\"sec-3\"><span class=\"section-number-2\">3<\/span> \u74b0\u5883<\/h3>\n<div id=\"text-3\" class=\"outline-text-2\">\u8a00\u8a9e: Python3.6.2(anaconda)<\/p>\n<ul class=\"org-ul\">\n<li>anaconda\u306fpython\u306e\u30c7\u30fc\u30bf\u5206\u6790\u7cfb\u306e\u30e9\u30a4\u30d6\u30e9\u30ea\u304c\u30bb\u30c3\u30c8\u3068\u306a\u3063\u3066\u3044\u308b\u30d1\u30c3\u30b1\u30fc\u30b8<\/li>\n<\/ul>\n<p>\u74b0\u5883: jupyter notebook<\/p>\n<ul class=\"org-ul\">\n<li>\u30d7\u30ed\u30b0\u30e9\u30e0\u306e\u8a18\u8ff0\u3001\u5b9f\u884c\u3001\u53ef\u8996\u5316\u3001\u30e1\u30e2\u3001\u304c\u4e00\u5143\u7ba1\u7406\u3067\u304d\u308b\u74b0\u5883\u3002<\/li>\n<\/ul>\n<p>Pyhon\u30d1\u30c3\u30b1\u30fc\u30b8:<\/p>\n<pre class=\"example\">import pandas as pd\r\nimport numpy as np\r\nfrom pandas import DataFrame\r\nfrom sklearn.linear_model import LogisticRegression\r\nfrom sklearn.metrics import accuracy_score\r\nimport csv\r\n<\/pre>\n<\/div>\n<\/div>\n<div id=\"outline-container-sec-4\" class=\"outline-2\">\n<p>&nbsp;<\/p>\n<h3 id=\"sec-4\"><span class=\"section-number-2\">4<\/span> \u30c7\u30fc\u30bf\u8aad\u307f\u8fbc\u307f<\/h3>\n<div id=\"text-4\" class=\"outline-text-2\">train.csv\u3001test.csv\u306fKaggle\u30b5\u30a4\u30c8\u304b\u3089\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u3057\u3001\u9069\u5f53\u306a\u5834\u6240\u306b\u4fdd\u5b58\u3059\u308b\u3002<\/p>\n<pre class=\"example\"># \u8a13\u7df4\u30c7\u30fc\u30bf\u306e\u8aad\u307f\u8fbc\u307f\r\ndf_train = pd.read_csv('~\/Devel\/Kaggle\/Titanic\/train.csv')\r\n# \u30c6\u30b9\u30c8\u30c7\u30fc\u30bf\u306e\u8aad\u307f\u8fbc\u307f\r\ndf_test = pd.read_csv('~\/Devel\/Kaggle\/Titanic\/test.csv')\r\n<\/pre>\n<\/div>\n<\/div>\n<div id=\"outline-container-sec-5\" class=\"outline-2\">\n<p>&nbsp;<\/p>\n<h3 id=\"sec-5\"><span class=\"section-number-2\">5<\/span> \u6b20\u640d\u30c7\u30fc\u30bf\u306e\u88dc\u5b8c<\/h3>\n<div id=\"text-5\" class=\"outline-text-2\">\u5e74\u9f62\u30c7\u30fc\u30bf\u306b\u6b20\u640d\u304c\u5b58\u5728\u3059\u308b\u305f\u3081\u3001\u9069\u5f53\u306a\u5024\u3067\u88dc\u5b8c\u3059\u308b\u3002<br \/>\n\u4eca\u56de\u306f\u6027\u5225\u6bce\u306e\u5e73\u5747\u5024\u3067\u88dc\u5b8c\u3002<\/p>\n<pre class=\"example\"># \u8a13\u7df4\u30c7\u30fc\u30bf\u306e\u6b20\u640d\u3092\u88dc\u5b8c\r\nage_mean_train = df_train.groupby('Sex').Age.mean()\r\ndf_train.Age.fillna(df_train[df_train.Age.isnull()].apply(lambda x: age_mean_train[x.Sex],axis=1), inplace=True)\r\n# \u30c6\u30b9\u30c8\u30c7\u30fc\u30bf\u306e\u6b20\u640d\u3092\u88dc\u5b8c\r\nage_mean_test = df_test.groupby('Sex').Age.mean()\r\ndf_test.Age.fillna(df_test[df_test.Age.isnull()].apply(lambda x: age_mean_test[x.Sex],axis=1),inplace=True)\r\n<\/pre>\n<p>\u203b\u30c7\u30fc\u30bf\u6b20\u640d\u306f\u4ee5\u4e0b\u3067\u78ba\u8a8d\u51fa\u6765\u308b\u3002<br \/>\n\u203b\u5e74\u9f62\u4ee5\u5916\u306b\u3082\u6b20\u640d\u30c7\u30fc\u30bf\u306f\u5b58\u5728\u3059\u308b\u304c\u3001\u4eca\u56de\u306f\u5206\u6790\u306b\u4f7f\u7528\u3057\u306a\u3044<\/p>\n<pre class=\"example\">df_train.isnull().sum()\r\n<\/pre>\n<\/div>\n<\/div>\n<div id=\"outline-container-sec-6\" class=\"outline-2\">\n<p>&nbsp;<\/p>\n<h3 id=\"sec-6\"><span class=\"section-number-2\">6<\/span> \u30c0\u30df\u30fc\u5909\u6570\u5316<\/h3>\n<div id=\"text-6\" class=\"outline-text-2\">\u6570\u5024\u3067\u306a\u3044\u30c7\u30fc\u30bf\u304c\u542b\u307e\u308c\u3066\u3044\u308b\u305f\u3081\u3001\u6271\u3044\u3084\u3059\u3044\u3088\u3046\u306b\u6570\u5024\u30c7\u30fc\u30bf\u306b\u7f6e\u304d\u63db\u3048\u308b\u3002<\/p>\n<pre class=\"example\">df_train['Female'] = df_train['Sex'].map( {'male': 0, 'female': 1} ).astype(int)\r\ndf_test['Female'] = df_test['Sex'].map( {'male': 0, 'female': 1} ).astype(int)\r\npclass_df_train  = pd.get_dummies(df_train['Pclass'],prefix='Class')\r\npclass_df_test  = pd.get_dummies(df_test['Pclass'],prefix='Class')\r\ndf_train = df_train.join(pclass_df_train)\r\ndf_test = df_test.join(pclass_df_test)\r\n<\/pre>\n<\/div>\n<\/div>\n<div id=\"outline-container-sec-7\" class=\"outline-2\">\n<p>&nbsp;<\/p>\n<h3 id=\"sec-7\"><span class=\"section-number-2\">7<\/span> \u30e2\u30c7\u30eb\u751f\u6210\u3068\u5b66\u7fd2<\/h3>\n<div id=\"text-7\" class=\"outline-text-2\">2\u5024\u5206\u985e\u30e2\u30c7\u30eb\u306e\u4e00\u3064\u3067\u3042\u308b\u30ed\u30b8\u30b9\u30c6\u30a3\u30c3\u30af\u56de\u5e30\u3092\u9069\u7528\u3002<\/p>\n<pre class=\"example\"># \u4eca\u56de\u4f7f\u7528\u3057\u306a\u3044\u30c7\u30fc\u30bf\u306f\u30c9\u30ed\u30c3\u30d7\r\nX = df_train.drop(['PassengerId','Survived','Pclass','Name','Sex','SibSp','Parch','Ticket','Fare','Cabin','Embarked'],axis=1)\r\n# \u671f\u5f85\u5024\r\ny = df_train.Survived\r\n# \u30e2\u30c7\u30eb\u306e\u751f\u6210\r\nclf = LogisticRegression()\r\n# \u5b66\u7fd2\u5b9f\u884c\r\nclf.fit(X, y)\r\n<\/pre>\n<\/div>\n<\/div>\n<div id=\"outline-container-sec-8\" class=\"outline-2\">\n<p>&nbsp;<\/p>\n<h3 id=\"sec-8\"><span class=\"section-number-2\">8<\/span> \u4e88\u6e2c<\/h3>\n<div id=\"text-8\" class=\"outline-text-2\">\u30c6\u30b9\u30c8\u30c7\u30fc\u30bf\u3092\u751f\u6210\u3057\u305f\u30e2\u30c7\u30eb\u3067\u4e88\u6e2c\u3059\u308b\u3002<\/p>\n<pre class=\"example\">df_test_in = df_test.drop(['PassengerId','Pclass','Name','Sex','SibSp','Parch','Ticket','Fare','Cabin','Embarked'],axis=1)\r\ntest_predict = clf.predict(df_test_in)\r\n<\/pre>\n<\/div>\n<\/div>\n<div id=\"outline-container-sec-9\" class=\"outline-2\">\n<p>&nbsp;<\/p>\n<h3 id=\"sec-9\"><span class=\"section-number-2\">9<\/span> \u7d50\u679c\u3092\u30d5\u30a1\u30a4\u30eb\u51fa\u529b<\/h3>\n<div id=\"text-9\" class=\"outline-text-2\">Kaggle\u3078Submit\u3059\u308b\u305f\u3081\u306b\u3001\u7d50\u679c\u3092\u30d5\u30a1\u30a4\u30eb\u51fa\u529b\u3059\u308b\u3002<\/p>\n<pre class=\"example\">with open(\"predict_result_data.csv\", \"w\") as f:\r\n    writer = csv.writer(f, lineterminator='\\n')\r\n    writer.writerow([\"PassengerId\", \"Survived\"])\r\n    for pid, survived in zip(df_test['PassengerId'], test_predict):\r\n        writer.writerow([pid, survived])\r\n<\/pre>\n<p>&nbsp;<\/p>\n<p>Kaggle\u306bSubmit\u3057\u305f\u3068\u3053\u308d\u3001\u4e88\u6e2c\u7cbe\u5ea6\u306f75.598%\u3067\u3057\u305f\u3002(8136\u4f4d\/9650)<\/p>\n<\/div>\n<\/div>\n<\/div>\n<div id=\"postamble\" class=\"status\">\n<p class=\"author\">Author: xxx<\/p>\n<p class=\"date\">Created: 2017-12-11 Mon 01:47<\/p>\n<p class=\"creator\"><a href=\"http:\/\/www.gnu.org\/software\/emacs\/\">Emacs<\/a> 25.3.1 (<a href=\"http:\/\/orgmode.org\">Org<\/a> mode 8.2.10)<\/p>\n<p class=\"validation\"><a href=\"http:\/\/validator.w3.org\/check?uri=referer\">Validate<\/a><\/p>\n<\/div>\n","protected":false},"excerpt":{"rendered":"<p>Table of Contents 1. Kaggle\u3068\u306f 2. Titanic\u30c1\u30e5\u30fc\u30c8\u30ea\u30a2\u30eb 3. \u74b0\u5883 4. \u30c7\u30fc\u30bf\u8aad\u307f\u8fbc\u307f 5. \u6b20\u640d\u30c7\u30fc\u30bf\u306e\u88dc\u5b8c 6. \u30c0\u30df\u30fc\u5909\u6570\u5316 7. \u30e2\u30c7\u30eb\u751f\u6210\u3068\u5b66\u7fd2 8. \u4e88\u6e2c 9. \u7d50\u679c [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":2771,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"om_disable_all_campaigns":false,"_monsterinsights_skip_tracking":false,"_monsterinsights_sitenote_active":false,"_monsterinsights_sitenote_note":"","_monsterinsights_sitenote_category":0,"vkexunit_cta_each_option":"","footnotes":""},"categories":[6],"tags":[],"class_list":["post-3015","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-engineer"],"aioseo_notices":[],"_links":{"self":[{"href":"https:\/\/www.ois-yokohama.co.jp\/oisblog2018\/wp-json\/wp\/v2\/posts\/3015","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.ois-yokohama.co.jp\/oisblog2018\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.ois-yokohama.co.jp\/oisblog2018\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.ois-yokohama.co.jp\/oisblog2018\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.ois-yokohama.co.jp\/oisblog2018\/wp-json\/wp\/v2\/comments?post=3015"}],"version-history":[{"count":2,"href":"https:\/\/www.ois-yokohama.co.jp\/oisblog2018\/wp-json\/wp\/v2\/posts\/3015\/revisions"}],"predecessor-version":[{"id":8001,"href":"https:\/\/www.ois-yokohama.co.jp\/oisblog2018\/wp-json\/wp\/v2\/posts\/3015\/revisions\/8001"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/www.ois-yokohama.co.jp\/oisblog2018\/wp-json\/wp\/v2\/media\/2771"}],"wp:attachment":[{"href":"https:\/\/www.ois-yokohama.co.jp\/oisblog2018\/wp-json\/wp\/v2\/media?parent=3015"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.ois-yokohama.co.jp\/oisblog2018\/wp-json\/wp\/v2\/categories?post=3015"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.ois-yokohama.co.jp\/oisblog2018\/wp-json\/wp\/v2\/tags?post=3015"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}