====== 数据挖掘 ====== 在线课程:[[https://www.edx.org/|edX]] [[https://www.coursera.org/|Coursera]]\\ ===== 教程 ===== [[http://statweb.stanford.edu/~tibs/ElemStatLearn/]]\\ ===== R语言 ===== [[http://www.rdatamining.com/]]\\ ===== Gephi ===== 教程:[[https://www.udemy.com/gephi/]]\\ ===== 算法 ===== ==== SMOTE ==== /////////////////SMOTE算法//////////////// library(DMwR) hds<-read.csv("compare.csv") hds<-hds[,c(2,3,4,5,6,7,8,9,10,11,12)] hds$have<-factor(hds$have) newd <- SMOTE(have~.,hds,perc.over=300,perc.under=150) table(newd$have) par(mfrow = c(1, 2)) plot(data[, 1], data[, 2], pch = 19 + as.integer(data[, 3]), main = "Original Data") plot(newData[, 1], newData[, 2], pch = 19 + as.integer(newData[,3]), main = "SMOTE'd Data") newd <-write.csv("newd.csv") ////////////////ROC曲线//////////////// library(Daim) rcut<-read.csv("rcut.csv") radd<-read.csv("radd.csv") rsmote<-read.csv("rsmote.csv") roc(rcut[,2],rcut[,1],"1")->r.cut roc(radd[,2],radd[,1],"1")->r.add roc(rsmote[,2],rsmote[,1],"1")->r.smote par(mfrow=c(2,2)) plot(r.smote,main="不同抽样算法比较",all.roc=TRUE) plot(r.add,col="green", add=TRUE) plot(r.smote,col="red", add=TRUE) legend("bottomright", c("0.632+","0.632","loob"), col=c("red","blue","green"), lty=1, inset=0.01) plot(r.cut,method="欠抽样",legend=TRUE) plot(r.add,method="过抽样") legend("bottomright", c("0.632+","0.632","loob"),col=c("red","blue","green"), lty=1, inset=0.01) library(Daim) rroc<-read.csv("roc.csv") roc(rroc[,2:4],rroc[,1],"1")->r.roc roc.area(rroc[,2:4],rroc[,1],"1")->r.aoc plot(r.roc,color=c("black","blue","red")) library(DMwR) hds<-read.csv("compare.csv") hds<-hds[,c(2,3,4,5,6,7,8,9,10,11,12)] hds$have<-factor(hds$have) newd <- SMOTE(have~.,hds,perc.over=300,perc.under=150) table(newd$have) write.csv(newd, "newd.csv")