Advertisement

37全球AI挑战赛之虚拟股票预测,冠军

阅读量:

官网(搭梯子): https://challenger.ai/

复制代码
    import pandas as pd
    from sklearn.linear_model import LogisticRegression
    from sklearn.preprocessing import Imputer
    import numpy as np
    from sklearn.decomposition import PCA
    from sklearn.ensemble import RandomForestClassifier  
    from lightgbm import LGBMClassifier
    from xgboost import XGBClassifier
    from sklearn.metrics import log_loss
    from sklearn.model_selection import cross_val_score
    from sklearn.model_selection import train_test_split
    from sklearn.ensemble import GradientBoostingClassifier
    from sklearn.svm import SVC
    from sklearn.linear_model import LogisticRegression
复制代码
    def cvPro(train,label):
    x_train,x_test,y_train,y_test = train_test_split(train,label,test_size = 0.3,random_state = 0)
    
    ##xgb
    xgb_clf = XGBClassifier()
    xgb_clf.fit(x_train,y_train)
    xgb_test_y = xgb_clf.predict_proba(x_test)
    xgb_y_lr_1 = [i[1] for i in xgb_test_y]
    
    loss = log_loss(y_test,xgb_y_lr_1)
    print("log_loss is :",loss)
复制代码
    def main(train_file_name,test_file_name,path):
    '''
    说明:lgb or xgb 默认参数:0.99 44 降维 成绩为:0.67447
    :param train_file_name:
    :param test_file_name:
    :param path:
    :return:
    '''
    
    print('Starting:...')
复制代码
    last_col = 100
    median_col = 44
    train_data = pd.read_csv(train_file_name)
    test_data = pd.read_csv(test_file_name)
    one_hot_Fea = ['group1','group2','code_id']
    weight = train_data['weight']
    df_one_hot = train_data[one_hot_Fea]
    train_label = train_data['label']
    train_matrix_xgb = train_data.iloc[:,range(1,median_col)]
    test_matrix_xgb = test_data.iloc[:,range(1,median_col)]
    train_matrix_lgb = train_data.iloc[:,range(median_col,last_col)]
    test_matrix_lgb = test_data.iloc[:,range(median_col,last_col)]
    test_id = test_data['id'].astype("int64")
    test_id = [np.int64(i) for i in test_id]
复制代码
    #数据清洗加归一化操作
    train_matrix_xgb.fillna(0)
    test_matrix_xgb.fillna(0)
    train_matrix_lgb.fillna(0)
    test_matrix_xgb.fillna(0)
    train_xgb_stand = Imputer().fit_transform(train_matrix_xgb)
    test_xgb_stand = Imputer().fit_transform(test_matrix_xgb)
    train_lgb_stand = Imputer().fit_transform(train_matrix_lgb)
    test_lgb_stand = Imputer().fit_transform(test_matrix_lgb)
复制代码
    ##lgb模型
    lgb_clf = LGBMClassifier()
    lgb_clf.fit(train_lgb_stand,train_label)
    lgb_test_y = lgb_clf.predict_proba(test_lgb_stand)
    lgb_y_lr_1 = [i[1] for i in lgb_test_y]
复制代码
    ###xgb模型
    xgb_clf = XGBClassifier()
    xgb_clf.fit(train_xgb_stand,train_label)
    xgb_test_y = xgb_clf.predict_proba(test_xgb_stand)
    xgb_y_lr_1 = [i[1] for i in xgb_test_y]
    
    pre_data = [(xgb_y_lr_1[i] * 0.5 + lgb_y_lr_1[i] * 0.5) for i in range(len(xgb_y_lr_1))]
复制代码
    ##模型评估
    #    cvPro(train_xgb_stand,train_label)
复制代码
    #生成提交的数据
    subname = "xgb_sub_05.csv"
    sub_data_lr = pd.DataFrame(test_id)
    sub_data_lr.columns = ['id']
    sub_data_lr['proba'] = pd.Series(pre_data)
    # dats = sub_data_lr.loc[1:1000]
    # print(dats.shape)
    sub_data_lr.to_csv(path + subname,encoding='utf8',index = False)
    print('It is Ok.')
复制代码
    if __name__ == "__main__":
    test_file_name = "../data/stock_test_data_20171125.csv"
    train_file_name = "../data/stock_train_data_20171125.csv"
    path = "../data/"
    main(train_file_name,test_file_name,path)

全部评论 (0)

还没有任何评论哟~