Advertisement

R语言的dplyr应用

阅读量:
####### Some dplyr
library(dplyr)
install.packages("hflights")
library(hflights)
flights<- as.data.frame(hflights)
### Filter function####
filter(flights, UniqueCarrier %in% c("AA", "UA"))
### Select#####
select(flights, DepTime, ArrTime, FlightNum)
select(flights, Year:DayofMonth, contains("Taxi"))
### Select and Filter#####
flights %>%
select(UniqueCarrier, DepDelay) %>%
filter(DepDelay >60)
### arrange#####
flights %>%
select(UniqueCarrier, DepDelay) %>%
arrange(desc(DepDelay))
### Mutate#####
flights<- flights %>%
mutate(speed = Distance/AirTime*60)
### Summarise#####
n()
n_distinct()
flights %>%
group_by(Dest) %>%
summarize(ave_delay = mean(ArrDelay, na.rm= TRUE))
## Summarize mean to multiple columns
flights %>%
group_by(UniqueCarrier) %>%
summarise_each(funs(mean), Cancelled, Diverted)
## Summarize min and max to multiple columns by match function to select columns
flights %>%
group_by(UniqueCarrier) %>%
summarise_each(funs(min(.,na.rm = TRUE), max(.,na.rm=TRUE)),matches("Delay"))
## Count n()
flights %>%
group_by(Month, DayofMonth)%>%
summarise(flight_count = n())%>%
arrange(desc(flight_count))
## Count n_distict()
flights %>%
group_by(Dest) %>%
summarise(flight_count = n(), plane_count= n_distinct(TailNum))
## Group without summarising
flights %>%
group_by(Dest) %>%
select(Cancelled) %>%
table()
#### Window function #######
## for each carrier, which 2 days of theyear had longest departure
flights %>%
group_by(UniqueCarrier)%>%
select(Month, DayofMonth, DepDelay)%>%
filter(min_rank(desc(DepDelay)) <=2 )%>%
arrange(UniqueCarrier, desc(DepDelay))
## using top_n function
flights %>%
group_by(UniqueCarrier)%>%
select(Month, DayofMonth, DepDelay)%>%
top_n(2)%>%
arrange(UniqueCarrier, desc(DepDelay))
## Calculate the month diff from last month
flights %>%
group_by(Month)%>%
summarise(flight_count = n())%>%
mutate(change = flight_count - lag(flight_count))
#### Other function############
## Sample data
flights%>% sample_n(5)
flights %>% sample_frac(0.25, replace = FALSE)
## rename
flights%>% rename(tail= TailNum)
## Sort dep_delay in each group
flights%>%
group_by(Month, DayofMonth) %>%
top_n(3, DepDelay)%>%
arrange(desc(DepDelay))
#### Reshape2
library(Reshape2)
melt()
dcast()
#### ggplot2###############################
### Average age for each occupation
ggplot(marketing, aes(job, age)) +
geom_bar(stat = "summary", fun.y = "mean", color = "black",fill= "grey", width = 0.5) +
theme_bw() +
labs( y = "Age",
title = "Age Distribution")+
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
##Average age for each occupation with y fill
ggplot(marketing, aes(job, age, fill = y)) +
geom_bar(stat = "summary", fun.y = "mean", width = 0.5) +
theme_bw() +
labs( y = "Age",
title = "Age Distribution")
### density plot
p <- ggplot(df, aes(x=weight)) +
geom_density() +
geom_vline(aes(xintercept=mean(weight)), # Mean line
color="blue", linetype="dashed", size=1)
# add density line by groups by color
p<-ggplot(df, aes(x=weight, color=sex)) +
geom_density()+
geom_vline(data=mu, aes(xintercept=mean, color=sex), # mu is mean for each group
linetype="dashed")
## scatter plot
# color with scatter plot
ggplot(data, aes(a,b, color = a)
geom_point(shape = 16, size = 5, show.legend = FALSE) +
theme_minimal()
# color and transparent alpha
ggplot(d, aes(a, b, color = pc)) +
geom_point(shape = 16, size = 5, show.legend = FALSE, alpha = .4) +
theme_minimal() +
scale_color_gradient(low = "#0091ff", high = "#f0650e")
## bar chart
ggplot(marketing, aes(job, age, fill = y)) +
geom_bar(stat = "summary", fun.y = "mean", width = 0.5) +
facet_wrap( ~ marital)
theme_bw() +
labs( y = "Age",
title = "Age Distribution")
# color with RcolorBrewer for stacked percent bar
ggplot(data, aex(fill = condition, y = value, x = specie))+
geom_bar(stat = "identity", position = "fill")+
scale_fill_brewer(palette = "Set1)
# Grouped chart
ggplot(data, aes(fill = condition, y=value, x=specie))+
geom_bar(postion = "dodge", stat = "identity")
# Faceting#
ggplot(data, aes(y= value, x= specie, color = specie, fill = specie))+
geom_bar(stat = "identity") +
facet_wrap(~condition)

全部评论 (0)

还没有任何评论哟~