Advertisement

机器学习 knn AdaBoost xgboost LGBM 的用户信用违约预测 完整代码数据

阅读量:

项目展示:机器学习中的k-近邻算法、AdaBoost、XGBoost与LightGBM的用户信用违约预测及完整的代码与数据集_哔哩哔哩_哔哩哔哩

结果演示:

复制代码
 import random

    
 from sklearn.model_selection import train_test_split
    
 from sklearn.preprocessing import MinMaxScaler
    
 from sklearn import preprocessing
    
 from datetime import datetime
    
 import time
    
 import math
    
 from matplotlib import pyplot
    
 from sklearn.model_selection import train_test_split
    
 from scipy import stats, integrate
    
 import seaborn as sns
    
 import matplotlib.pyplot as plt
    
 from matplotlib.font_manager import FontProperties
    
 import numpy as np
    
 import pandas as pd
    
 import  matplotlib.pyplot as plt
    
 from sklearn.model_selection import KFold
    
 from sklearn.metrics import classification_report
    
 from sklearn.model_selection import learning_curve
    
 from sklearn.model_selection import  validation_curve
    
 from sklearn.model_selection import cross_val_score
    
 from sklearn.preprocessing import StandardScaler
    
 from sklearn.decomposition import PCA
    
 from sklearn.metrics import confusion_matrix
    
 from sklearn.model_selection import train_test_split
    
 from sklearn import metrics
    
 from sklearn.svm import SVR,SVC
    
 from sklearn.neighbors import KNeighborsClassifier
    
 from sklearn.metrics import mean_squared_error,mean_absolute_error  # 评价指标
    
 from sklearn.metrics import f1_score,roc_auc_score,accuracy_score
    
 from sklearn.linear_model import LogisticRegression
    
 plt.rcParams['font.sans-serif'] = ['SimHei']  # 用来正常显示中文标签
    
 plt.rcParams['axes.unicode_minus'] = False    # 用来正常显示负号
    
 data = pd.read_csv("用户信息.csv")         # 1 3 7 是 预测列
    
 data.dropna(axis=0, how='any')
    
 data = data.fillna(0)
    
 print(data.columns[:25])# ['user_id', 'y', 'x_001', 'x_002', 'x_003', 'x_004', 'x_005', 'x_006','x_007', 'x_008', 'x_009', 'x_010', 'x_011', 'x_012', 'x_013', 'x_014', 'x_015', 'x_016', 'x_017', 'x_018', 'x_019', 'x_020']
    
 print(data)
    
 data=data[['y','x_001', 'x_002', 'x_003', 'x_004', 'x_005', 'x_006','x_007', 'x_008', 'x_009', 'x_010', 'x_011', 'x_012', 'x_013', 'x_014', 'x_015', 'x_016', 'x_017', 'x_018', 'x_019', 'x_020']]
    
  
    
 data_y=data['y'].values
    
 data_x=data[['x_001', 'x_002', 'x_003', 'x_004', 'x_005', 'x_006','x_007', 'x_008', 'x_009', 'x_010', 'x_011', 'x_012', 'x_013', 'x_014', 'x_015', 'x_016', 'x_017', 'x_018', 'x_019', 'x_020']].values
    
  
    
 x_train,x_test,y_train,y_test = train_test_split(np.array(data_x),np.array(data_y),test_size=0.2)
    
 plt.subplots(figsize=(16, 16))
    
 sns.heatmap(data.corr(method = 'spearman').round(5), annot=True)
    
 plt.show()
    
 # 设置Axes的标题
    
  
    
 data_acc=[]
    
 data_f1=[]
    
  
    
 # knn算法
    
 knn = KNeighborsClassifier()
    
 knn.fit(x_train, y_train)
    
 test_pred = knn.predict(x_test)  # 进行预测
    
 print("knn算法----------------------------------------- ")
    
 print(test_pred[:10])
    
 print(y_test[:10])
    
 print("accuracy_score:",accuracy_score(test_pred,y_test))
    
 print("f1_score:",f1_score(test_pred,y_test))
    
 data_acc.append(accuracy_score(test_pred,y_test))
    
 data_f1.append(f1_score(test_pred,y_test))
    
  
    
  
    
 # svm算法
    
 svm = SVC()
    
 svm.fit(x_train, y_train)
    
 test_pred = svm.predict(x_test)
    
 print("svm算法 ")
    
 print(test_pred[:10])
    
 print(y_test[:10])
    
 print("accuracy_score:",accuracy_score(test_pred,y_test))
    
 print("f1_score:",f1_score(test_pred,y_test))
    
 data_acc.append(accuracy_score(test_pred,y_test))
    
 data_f1.append(f1_score(test_pred,y_test))
    
  
    
  
    
 # RandomForestRClassifier
    
 from sklearn.ensemble import RandomForestClassifier
    
 svm = RandomForestClassifier()
    
 svm.fit(x_train, y_train)
    
 test_pred = svm.predict(x_test)
    
 print("RandomForest算法 ")
    
 print(test_pred[:10])
    
 print(y_test[:10])
    
 print("accuracy_score:",accuracy_score(test_pred,y_test))
    
 print("f1_score:",f1_score(test_pred,y_test))
    
 data_acc.append(accuracy_score(test_pred,y_test))
    
 data_f1.append(f1_score(test_pred,y_test))
    
  
    
  
    
 # AdaBoostClassifier
    
 from sklearn.ensemble import AdaBoostClassifier
    
 svm = AdaBoostClassifier()
    
 svm.fit(x_train, y_train)
    
 test_pred = svm.predict(x_test)
    
 print("AdaBoost算法 ")
    
 print(test_pred[:10])
    
 print(y_test[:10])
    
 print("accuracy_score:",accuracy_score(test_pred,y_test))
    
 print("f1_score:",f1_score(test_pred,y_test))
    
 data_acc.append(accuracy_score(test_pred,y_test))
    
 data_f1.append(f1_score(test_pred,y_test))
    
  
    
  
    
 #  XGBRClassifier
    
 from xgboost import XGBClassifier
    
 svm  = XGBClassifier()
    
 svm.fit(x_train, y_train)
    
 test_pred = svm.predict(x_test)
    
 print("XGBRegressor算法 ")
    
 print(test_pred[:10])
    
 print(y_test[:10])
    
 print("accuracy_score:",accuracy_score(test_pred,y_test))
    
 print("f1_score:",f1_score(test_pred,y_test))
    
 data_acc.append(accuracy_score(test_pred,y_test))
    
 data_f1.append(f1_score(test_pred,y_test))
    
  
    
  
    
 # GradientBoosting
    
 from sklearn.ensemble import GradientBoostingClassifier
    
 svm  = GradientBoostingClassifier()
    
 svm.fit(x_train, y_train)
    
 test_pred = svm.predict(x_test)
    
 print("GradientBoosting算法 ")
    
 print(test_pred[:10])
    
 print(y_test[:10])
    
 print("accuracy_score:",accuracy_score(test_pred,y_test))
    
 print("f1_score:",f1_score(test_pred,y_test))
    
 data_acc.append(accuracy_score(test_pred,y_test))
    
 data_f1.append(f1_score(test_pred,y_test))
    
  
    
  
    
 # LGBMClassifier
    
 from lightgbm import LGBMClassifier
    
 from lightgbm import LGBMClassifier
    
 svm  = LGBMClassifier()
    
 svm.fit(x_train, y_train)
    
 test_pred = svm.predict(x_test)
    
 print("LGB算法 ")
    
 print(test_pred[:10])
    
 print(y_test[:10])
    
 print("accuracy_score:",accuracy_score(test_pred,y_test))
    
 print("f1_score:",f1_score(test_pred,y_test))
    
 data_acc.append(accuracy_score(test_pred,y_test))
    
 data_f1.append(f1_score(test_pred,y_test))
    
  
    
 import matplotlib.pyplot as plt
    
 import numpy as np
    
 import matplotlib.pyplot as plt
    
 import matplotlib as mpl
    
 import numpy as np
    
  
    
 mpl.rcParams["font.sans-serif"] = ["SimHei"]
    
 mpl.rcParams["axes.unicode_minus"] = False
    
 def zhu_zhuang_tu(label_list, size, title_name, y_name, x_name):
    
     """
    
     # 柱状图
    
     label_list = ["第一部分", "第二部分", "第三部分"]
    
     size = [55, 35, 10]    # 各部分大小
    
     """
    
     fig = plt.figure()
    
     plt.bar(label_list, size, 0.5, color="green")
    
     plt.xlabel(x_name)
    
     plt.ylabel(y_name)
    
     plt.title(title_name)
    
     plt.show()
    
 label_list=["knn", "svm", "RandomForest", "AdaBoost", "xgboost ", "GradientBoosting", "LGBM"]
    
 size=data_acc
    
 zhu_zhuang_tu(label_list, size, "算法对比图","Accuracy","算法模型")
    
  
    
  
    
 label_list=["knn", "svm", "RandomForest", "AdaBoost", "xgboost ", "GradientBoosting", "LGBM"]
    
 size=data_f1
    
 zhu_zhuang_tu(label_list, size, "算法对比图","F1_score","算法模型")
    
    
    
    
    代码解读

完整代码数据:<>

全部评论 (0)

还没有任何评论哟~