Advertisement

R语言 多元线性回归 研究年龄、身高、体重的关系

阅读量:
  • 0-20岁数据分析
复制代码
    data <- read.table('e://kg.txt',
                   header = TRUE,
                   sep = '\t')
    data <- data %>% as_tibble()
    data %>% attach()
    data %>% ggplot(aes(cm, kg))+ geom_line()
    data %>% ggplot(aes(age,cm))+ geom_line()
    data %>% ggplot(aes(age,kg))+ geom_line()
    
    # age 与 height 与weight 关系:
    data[1:3] %>% cor() %>% corrplot::corrplot(method = "color",
                                           addCoef.col = "grey")
    lm_data <- data %>% lm(kg~I(cm^3),.)
    lm_data %>% summary()
    lm_data
    plot(cm^3,kg,xaxt='n');
    axis(1,at=cm^3,labels=cm);
    abline(lm_data)
    
    # ggplot拟合
    data %>% ggplot(aes(cm^3,kg)) + 
      geom_point() + 
      geom_smooth()
    
    
    r
    
    
![](https://ad.itadn.com/c/weblog/blog-img/images/2025-07-13/JhS23TqLieuzoIZyxRnt0DCKjbQr.png)
  • 分性别数据分析
复制代码
    # https://zhuanlan.zhihu.com/p/94372177
    # https://www.jianshu.com/p/a081a791ae03
    # https://cloud.tencent.com/developer/article/1674211
    # https://www3.nd.edu/~steve/computing_with_data/2_Motivation/motivate_ht_wt.html?spm=a2c4e.11153940.blogcont603256.20.333b1d6fYOsiOK
    # 载入数据,数据集在这里下载:https://github.com/johnmyleswhite/ML_for_Hackers/blob/master/02-Exploration/data/01_heights_weights_genders.csv
    library(tidyverse)
    ht_weight_df <- read.table("e://01_heights_weights_genders.txt",
                           header = TRUE,
                           sep = "\t") %>% 
      as_tibble()
    ht_weight_df %>% mice::md.pattern()
    
    # 绘图查看相关性
    ht_weight_df %>% select(-1) %>% 
      cor() %>% corrplot::corrplot(method = "color",
                               addCoef.col = "grey")
    ht_weight_df %>% select(-1) %>% sample_frac(0.1) %>% 
      plot(cex = 0.1)
    
    # 拟合检验线性相关
    lm_ht_weight <- lm(Weight ~ Height, data = ht_weight_df)
    lm_ht_weight %>% summary()
    lm_ht_weight %>% abline()
    
    # 分性别对照
    ht_weight_df %>% group_by(Gender) %>% 
      dplyr::summarise( round( mean( Height)* 2.54))
      # subset(Gender == )也可选取组
      # fivenum() 不能[2]、select(2)
      # sapply()不能$变量、select(2)
      # psych::describe() 不能[2]
      # pastecs::stat.desc()、Hmisc::describe()、summary() 都可以
      # plyr::ddply(.(Gender), function(df) summary(df$Height))从原数据分组求值
    
    # 查看分布
    par(mfrow = c(1,1))
    ht_weight_df %>% subset(Gender == "Male") %>% select(Height) %>% 
      unlist() %>% as.numeric() %>% 
      density() %>% plot(type = "h", col = 4, ann = FALSE) #  main被屏蔽
    ht_weight_df %>% subset(Gender == "Female") %>% select(Height) %>% 
      unlist() %>% as.numeric() %>% 
      density() %>% lines(col = 2)
    title(main = "Height By Gender")
    abline(col = c(1, 2),
       lty = 3,
       v = c(
         mean(ht_weight_df %>% subset(Gender == "Male") %>% 
                select(Height) %>% unlist()),
         mean(ht_weight_df %>% subset(Gender == "Female") %>% 
                select(Height) %>% unlist())
         ))
    ht_weight_df %>% ggplot(aes(x = Height, colour = Gender)) + 
      geom_density()
    ht_weight_df %>% ggplot(aes(sample = Height)) + 
      geom_point(stat = "qq") + facet_wrap(~Gender) # stat_qq requires sample
    
    # 分类数据线性拟合
    ht_weight_df %>% ggplot(aes(x = Height, y = Weight, colour = Gender)) +
      geom_point(alpha = 0.2) + 
      geom_smooth(method = "lm", formula = y ~ x)
    lm_ht_wt_by_gender <- lm(Weight ~ Height * Gender, data = ht_weight_df)
    lm_ht_wt_by_gender %>% summary()
    
    
    r
    
    
![](https://ad.itadn.com/c/weblog/blog-img/images/2025-07-13/CyeOa3VZu0WvhbpFHlq4JdkB6tAo.png)
  • 如果观察人的一生,身高、体重的变化曲线,会是什么样的呢?

全部评论 (0)

还没有任何评论哟~