Rのmvtnormパッケージの関数 pmvnorm() の引数 lower,upper の意味について
*R言語のmvtnormパッケージの多変数累積密度関数pmvnorm()
を使用して、2次元の正規分布関数の任意の範囲の累積確率密度(確率)を取得したいのですが、この関数の引数lower,upper(ベクトルで指定)が何を意味しているのか分かりません。この引数の意味を教えて頂きたいですm(__)m
関数pmvnorm()
のヘルプ
http://math.furman.edu/~dcs/courses/math47/R/library/mvtnorm/html/pmvnorm.html
以下実装中コードです。
library( MASS ) # MASS package(ピマ・インディアンPima のデータを使用)
library( mvtnorm ) # 多変量正規分布を扱う
# set options
options( digits=5 ) # 表示桁数
# Pima data expand on memory
data( Pima.tr )
data( Pima.te )
# copy data from Pima data
lstPimaTrain <- list(
numNoDiabetes = 0, # 糖尿病未発症人数(0で初期化)
numDiabetes = 0, # 糖尿病発症人数(0で初期化)
glu = Pima.tr$glu,
bmi = Pima.tr$bmi,
bResult = rep(FALSE, length(Pima.tr$glu)) # 糖尿病か否か?(FALSE:糖尿病でない、 TRUE:糖尿病)
)
# Pima.tr$type のデータ(Yes,No)を符号化[encoding]
for ( i in 1:length(Pima.tr$type) )
{
if(Pima.tr$type[i] == "Yes" )
{
lstPimaTrain$numDiabetes <- (lstPimaTrain$numDiabetes + 1)
lstPimaTrain$bResult[i] <- TRUE
}
else if( Pima.tr$type[i] == "No" )
{
lstPimaTrain$numNoDiabetes <- (lstPimaTrain$numNoDiabetes + 1)
lstPimaTrain$bResult[i] <- FALSE
}
else{
# Do Nothing
}
}
# sort Pima data
lstPimaTrain$glu <- lstPimaTrain$glu[ order(lstPimaTrain$bResult) ]
lstPimaTrain$bmi <- lstPimaTrain$bmi[ order(lstPimaTrain$bResult) ]
lstPimaTrain$bResult <- lstPimaTrain$bResult[ order(lstPimaTrain$bResult) ]
# split data to class C1 and C2
dfPimaTrain_C1 <- data.frame(
glu = lstPimaTrain$glu[1:lstPimaTrain$numNoDiabetes],
bmi = lstPimaTrain$bmi[1:lstPimaTrain$numNoDiabetes],
bResult = FALSE # 糖尿病か否か?(FALSE:糖尿病でない、TRUE:糖尿病)
)
dfPimaTrain_C2 <- data.frame(
glu = lstPimaTrain$glu[(lstPimaTrain$numNoDiabetes+1): (lstPimaTrain$numNoDiabetes+lstPimaTrain$numDiabetes)],
bmi = lstPimaTrain$bmi[(lstPimaTrain$numNoDiabetes+1):(lstPimaTrain$numNoDiabetes+lstPimaTrain$numDiabetes)],
bResult = TRUE # 糖尿病か否か?(FALSE:糖尿病でない、TRUE:糖尿病)
)
# sort class C1,C2 data
dfPimaTrain_C1$glu <- dfPimaTrain_C1$glu[ order(dfPimaTrain_C1$glu) ]
dfPimaTrain_C1$bmi <- dfPimaTrain_C1$bmi[ order(dfPimaTrain_C1$glu) ]
dfPimaTrain_C2$glu <- dfPimaTrain_C2$glu[ order(dfPimaTrain_C2$glu) ]
dfPimaTrain_C2$bmi <- dfPimaTrain_C2$bmi[ order(dfPimaTrain_C2$glu) ]
# release memory
rm(Pima.tr)
rm(Pima.te)
#-------------------
# set class C1 data
#-------------------
datU1_C1 <- mean( dfPimaTrain_C1$glu ) # クラス1(糖尿病発症なし)の変数1(glu)の平均値
datU2_C1 <- mean( dfPimaTrain_C1$bmi ) # クラス1(糖尿病発症有り)の変数2(BMI)の平均値
datU_C1 <- matrix( c(datU1_C1,datU2_C1), nrow = 2, ncol = 1) # クラス1(糖尿病発症なし)の平均ベクトル
matS_C1 <- matrix( c(0,0,0,0), nrow = 2, ncol = 2 ) # クラス1(糖尿病発症なし)の共分散行列
matS_C1[1,1] <- sqrt( var( dfPimaTrain_C1$glu, dfPimaTrain_C1$glu ) )
matS_C1[1,2] <- sqrt( var( dfPimaTrain_C1$glu, dfPimaTrain_C1$bmi ) )
matS_C1[2,1] <- sqrt( var( dfPimaTrain_C1$bmi, dfPimaTrain_C1$glu ) )
matS_C1[2,2] <- sqrt( var( dfPimaTrain_C1$bmi, dfPimaTrain_C1$bmi ) )
#-------------------
# set class C2 data
#-------------------
datU1_C2 <- mean( dfPimaTrain_C2$glu ) # クラス2(糖尿病発症なし)の変数1(glu)の平均値
datU2_C2 <- mean( dfPimaTrain_C2$bmi ) # クラス2(糖尿病発症有り)の変数2(BMI)の平均値
datU_C2 <- matrix( c(datU1_C2,datU2_C2), nrow = 2, ncol = 1) # クラス1(糖尿病発症なし)の平均ベクトル
matS_C2 <- matrix( c(0,0,0,0), nrow = 2, ncol = 2 ) # クラス2(糖尿病発症なし)の共分散行列
matS_C2[1,1] <- sqrt( var( dfPimaTrain_C2$glu, dfPimaTrain_C2$glu ) )
matS_C2[1,2] <- sqrt( var( dfPimaTrain_C2$glu, dfPimaTrain_C2$bmi ) )
matS_C2[2,1] <- sqrt( var( dfPimaTrain_C2$bmi, dfPimaTrain_C2$glu ) )
matS_C2[2,2] <- sqrt( var( dfPimaTrain_C2$bmi, dfPimaTrain_C2$bmi ) )
##################################
# secondary idification function
##################################
Dim2IdificationFunc <- function( x1, x2, u_C1, u_C2, matS1, matS2, P_C1=0.5, P_C2=0.5 )
{
datX <- matrix( 0, nrow = 2, ncol = 1 )
datX[1,] <- x1
datX[2,] <- x2
matW <- matrix( c(0,0,0,0), nrow = 2, ncol = 2 )
matW <- ( solve(matS1) - solve(matS2) )
vct <- ( t(u_C2)%*%solve(matS2) - t(u_C1)%*%solve(matS1) )
r <- ( t(u_C1)%*%solve(matS1)%*%u_C1 - t(u_C2)%*%solve(matS2)%*%u_C2 + log( det(matS1)/det(matS2) ) - 2*log(P_C1/P_C2) )
z0 <- ( t(datX)%*%matW%*%datX )
z1 <- ( 2*vct%*%datX )
z <- ( z0 + z1 + r )
return(z)
}
###############################
# liner idification function
###############################
Dim1IdificationFunc <- function( x1, x2, u_C1, u_C2, matS1, matS2, P_C1=0.5, P_C2=0.5 )
{
datX <- matrix( 0, nrow = 2, ncol = 1 )
datX[1,] <- x1
datX[2,] <- x2
matSp <- matrix( c(0,0,0,0), nrow = 2, ncol = 2 )
matSp <- P_C1*matS1 + P_C2*matS2
vct <- ( t(u_C2)%*%solve(matSp) - t(u_C1)%*%solve(matSp) )
r <- ( t(u_C1)%*%solve(matSp)%*%u_C1 - t(u_C2)%*%solve(matSp)%*%u_C2 + log( det(matSp)/det(matSp) ) - 2*log(P_C1/P_C2) )
z <- ( 2*vct%*%datX + r )
return(z)
}
#------------------------------
# set functions values
#-------------------------------
datX1 <- seq( from=0, to=200, by=5 ) # glu 軸の値ベクトル
datX2 <- seq( from=0, to=50, by=1 ) # BMI 軸の値ベクトル
datX1 <- as.matrix( datX1 )
datX2 <- as.matrix( datX2 )
matZDim2 <- matrix( 0, nrow=length(datX1), ncol=length(datX2) )
matZDim1 <- matrix( 0, nrow=length(datX1), ncol=length(datX2) )
# for loop matZDim2[i,j], matZDim1[i,j]
for( j in 1:length(datX2) )
{
for( i in 1:length(datX1) )
{
matZDim2[i,j] <- Dim2IdificationFunc(
x1 = datX1[i], x2 = datX2[j],
u_C1 = datU_C1, u_C2 = datU_C2, matS1 = matS_C1, matS2 = matS_C2
)
matZDim1[i,j] <- Dim1IdificationFunc(
x1 = datX1[i], x2 = datX2[j],
u_C1 = datU_C1, u_C2 = datU_C2, matS1 = matS_C1, matS2 = matS_C2 ,
P_C1 = lstPimaTrain$numNoDiabetes/(lstPimaTrain$numNoDiabetes+lstPimaTrain$numDiabetes),
P_C2 = lstPimaTrain$numNoDiabetes/(lstPimaTrain$numDiabetes+lstPimaTrain$numDiabetes)
)
}
}
############################
# ROC Curve #
############################
#-------------------------------------------------
# get error ratio from Normal distribution (2dim)
#-------------------------------------------------
datLow = seq( from = 0.0, to = 200, by = 1.0 ) # idefication-boundary <lower>
datUp = seq( from = 0.0, to = 200, by = 1.0 ) # idefication-boundary <upper>
dfROC2 <- data.frame(
sigma1 = matrix( 0, nrow = 10, ncol = 1 ),
sigma2 = matrix( 0, nrow = 10, ncol = 1 ),
sigma = matrix( 0, nrow = 10, ncol = 1 )
)
# pmvnormの動作確認
dfROC2$sigma1[1] <- pmvnorm( lower = -Inf, upper = +Inf, mean = as.vector(datU_C1), sigma = matS_C1 )
dfROC2$sigma1[2] <- pmvnorm( lower = -Inf, upper = datU1_C1, mean = as.vector(datU_C1), sigma = matS_C1 )
dfROC2$sigma1[3] <- pmvnorm( lower = -Inf, upper = datU1_C1+10, mean = as.vector(datU_C1), sigma = matS_C1 )
dfROC2$sigma1[4] <- pmvnorm( lower = datU1_C1, upper = +Inf, mean = as.vector(datU_C1), sigma = matS_C1 )
dfROC2$sigma2[1] <- pmvnorm( lower = -Inf, upper = c(0,0), mean = as.vector(datU_C2), sigma = matS_C2 )
dfROC2$sigma2[2] <- pmvnorm( lower = -Inf, upper = c(50,10), mean = as.vector(datU_C2), sigma = matS_C2 )
dfROC2$sigma2[3] <- pmvnorm( lower = -Inf, upper = c(100,30), mean = as.vector(datU_C2), sigma = matS_C2 )
dfROC2$sigma2[4] <- pmvnorm( lower = -Inf, upper = c(150,40), mean = as.vector(datU_C2), sigma = matS_C2 )
dfROC2$sigma <- dfROC2$sigma1 + dfROC2$sigma2
print( dfROC2 )
############################
# set graphics parameters #
############################
# 軸に関してのデータリスト
lstAxis <- list(
xMin = 0.0, xMax = 1.0, # x軸の最小値、最大値
yMin = 0.0, yMax = 1.0, # y軸の最小値、最大値
zMin = 0.0, zMax = 1.0, # z軸の最小値、最大値
xlim = range( c(0.0, 1.0) ),
ylim = range( c(0.0, 1.0) ),
zlim = range( c(0.0, 1.0) ),
mainTitle1 = "ROC曲線(2次元正規分布)\n2次識別関数[secondary idification function]", # 図のメインタイトル(図の上)
mainTitle2 = "ROC曲線(2次元正規分布)\n線形識別関数[liner idification function]", # 図のメインタイトル(図の上)
mainTitle3 = "ROC曲線(2次元正規分布)\n", # 図のメインタイトル(図の上)
subTitle1 = "2次識別関数[dim2 idification function]", # 図のサブタイトル(図の下)
subTitle2 = "線形識別関数[liner idification function]", # 図のサブタイトル(図の下)
subTitle3 = "2次識別関数+線形識別関数", # 図のサブタイトル(図の下)
xlab = "偽陽性率 [false positive rate]", # x軸の名前
ylab = "真陽性率 [true positive rate]", # y軸の名前
zlab = "z" # z軸の名前
)
lstAxis$xlim = range( c(lstAxis$xMin, lstAxis$xMax) )
lstAxis$ylim = range( c(lstAxis$yMin, lstAxis$yMax) )
lstAxis$zlim = range( c(lstAxis$zMin, lstAxis$zMax) )
#########################
# Draw figure #
#########################
# plot ROC Curve
plot(
dfROC2$sigma1, dfROC2$sigma2,
main = lstAxis$mainTitle1,
xlab = lstAxis$xlab, ylab = lstAxis$ylab,
xlim = lstAxis$xlim, ylim = lstAxis$ylim,
type = "p"
)
grid() # 図にグリッド線を追加強調太字テキスト