Advertisement

Kaggle -- Titanic - Machine Learning from Disaster

阅读量:

新手kaggle之旅:1 . 泰坦尼克号

使用一个简单的决策树进行模型构建,达到75.8%的准确率(有点低,但是刚开始)

完整代码如下:

复制代码
 import pandas as pd

    
 import numpy as np
    
  
    
 df = pd.read_csv("train.csv")
    
  
    
 df.info
    
  
    
 label = ['Pclass','Sex','Age','SibSp','Fare','Embarked']
    
  
    
 x = df[label]
    
 y = df['Survived']
    
 print(x.loc[0])
    
  
    
 x['Embarked'] = x['Embarked'].map({'C': 1, 'Q': 2, 'S': 3})
    
  
    
  
    
 x['Sex'] = x['Sex'].map({'male': 1,'female' : 2})
    
 print(x.loc[0])
    
  
    
 x = x.fillna(x.mean())
    
  
    
  
    
 import sklearn
    
 from sklearn.tree import DecisionTreeClassifier
    
 from sklearn.model_selection import train_test_split
    
 from sklearn.metrics import accuracy_score
    
  
    
 train_x,test_x,train_y,test_y = train_test_split(x,y,test_size=0.2,random_state=42,shuffle=True)
    
  
    
 clf = DecisionTreeClassifier()
    
 clf.fit(train_x,train_y)
    
  
    
  
    
 y_pred = clf.predict(test_x)
    
  
    
 accuracy = accuracy_score(y_pred,test_y)
    
 print(f"Accuracy: {accuracy * 100:.2f}%")
    
  
    
  
    
 res = pd.read_csv('test.csv')
    
 print(res.loc[0])
    
  
    
  
    
 res_x = res[label]
    
 res_x['Embarked'] = res_x['Embarked'].map({'C': 1, 'Q': 2, 'S': 3})
    
 res_x['Sex'] = res_x['Sex'].map({'male': 1,'female' : 2})
    
 print(res_x.loc[0])
    
  
    
 res_x = res_x.fillna(res_x.mean())
    
  
    
  
    
 pred = clf.predict(res_x)
    
 print(pred[0])
    
  
    
 ans = res[['PassengerId']].copy()
    
 ans['Survived'] = pred
    
  
    
 print(ans.loc[0])
    
  
    
 ans.to_csv("ans.csv")
    
    
    
    
    
![](https://ad.itadn.com/c/weblog/blog-img/images/2025-08-17/Nkdz8O6xmbgDEJXv5eF1s92Ip30j.png)

全部评论 (0)

还没有任何评论哟~