数据挖掘 [Inkit wiki]

用户工具

站点工具


stat:mining

数据挖掘

在线课程:edX Coursera

教程

R语言

Gephi

算法

SMOTE

/////////////////SMOTE算法////////////////
library(DMwR)

hds<-read.csv("compare.csv")
hds<-hds[,c(2,3,4,5,6,7,8,9,10,11,12)]
hds$have<-factor(hds$have)
newd <- SMOTE(have~.,hds,perc.over=300,perc.under=150)
table(newd$have)
par(mfrow = c(1, 2))
plot(data[, 1], data[, 2], pch = 19 + as.integer(data[, 3]),
     main = "Original Data")
plot(newData[, 1], newData[, 2], pch = 19 + as.integer(newData[,3]),
     main = "SMOTE'd Data")
 
newd <-write.csv("newd.csv")

////////////////ROC曲线////////////////
library(Daim)
rcut<-read.csv("rcut.csv")
radd<-read.csv("radd.csv")
rsmote<-read.csv("rsmote.csv")
roc(rcut[,2],rcut[,1],"1")->r.cut
roc(radd[,2],radd[,1],"1")->r.add
roc(rsmote[,2],rsmote[,1],"1")->r.smote
par(mfrow=c(2,2))
plot(r.smote,main="不同抽样算法比较",all.roc=TRUE) 
plot(r.add,col="green", add=TRUE)
plot(r.smote,col="red", add=TRUE)
legend("bottomright", c("0.632+","0.632","loob"),
         col=c("red","blue","green"), lty=1, inset=0.01)
plot(r.cut,method="欠抽样",legend=TRUE)
plot(r.add,method="过抽样")
legend("bottomright", c("0.632+","0.632","loob"),col=c("red","blue","green"), lty=1, inset=0.01)
library(Daim)
rroc<-read.csv("roc.csv")
roc(rroc[,2:4],rroc[,1],"1")->r.roc
roc.area(rroc[,2:4],rroc[,1],"1")->r.aoc
plot(r.roc,color=c("black","blue","red"))


library(DMwR)
hds<-read.csv("compare.csv")
hds<-hds[,c(2,3,4,5,6,7,8,9,10,11,12)]
hds$have<-factor(hds$have)
newd <- SMOTE(have~.,hds,perc.over=300,perc.under=150)
table(newd$have)
 
write.csv(newd, "newd.csv")
stat/mining.txt · 最后更改: 2023/04/04 18:02 由 127.0.0.1