본문 바로가기

R

R - sort(), with(), within(), merge(),order() - 2020/01/14


title: "book_1"
output: html_document


knitr::opts_chunk$set(echo = TRUE)
library(doBy)

doBy::summaryBy(Sepal.Width + Sepal.Length ~Species,iris)
#summary of Sepal.Width and Sepql.Length by Species.

##orderBy() : align a dataframe

order(iris$Sepal.Width)

iris[order(iris$Sepal.Width),]
iris[order(iris$Sepal.Length,iris$Sepal.Width),]
orderBy(~Sepal.Width, iris)
orderBy(~Species + Sepal.Width, iris)


##sampleBy()

sample(1:10,5)
sample(1:10,5,replace = TRUE)
iris[sample(NROW(iris),NROW(iris)),]

sampleBy(~Species, frac=0.1,data=iris)
# extract 10% data by species 

#split() subset() merge()

split(iris,iris$Species)
lapply(split(iris$Sepal.Length,iris$Species), mean)
#Means of sepal length by species
subset(iris,Species=="setosa")

subset(iris,Species=="setosa" & Sepal.Length >5.0)

subset(iris, select = c(Sepal.Length,Species))

subset(iris, select = -c(Sepal.Length, Species))#show except those two cols

iris[, !names(iris) %in% c("Sepal.Length","Species")]

x<- data.frame(name=c("a","b","c"), math=c(1,2,3))
y<-data.frame(name=c("c","b","a"),english=c(4,5,6))
merge(x,y)
x<- data.frame(name=c("a","b","c"), math=c(1,2,3))
y<-data.frame(name=c("c","b","d"),english=c(4,5,6))
merge(x,y)
merge(x,y,all=TRUE)
#all=TRUE : if there are no same values then put NA 

#sort()
#order()

x<-c(20,11,33,50,47)
sort(x) 
sort(x,decreasing = TRUE)
order(x,decreasing = TRUE)

iris[order(iris$Sepal.Length),]
iris[order(iris$Sepal.Length,iris$Petal.Length),]

#with(), within(), attach(), detach()

print(mean(iris$Sepal.Length))
print(mean(iris$Sepal.Width))

with(iris,{print(mean(Sepal.Length))
  print(mean(Sepal.Width))})
?with


x<-data.frame(val=c(1,2,3,4,NA,5,NA))
within(x,{val<-ifelse(is.na(val),median(val,na.rm=TRUE),val)})