2 Nonparametric Statistics

2.1 Correlation

# Membuat data contoh
# Membuat vektor untuk responden, X, dan Y
X <- c(2, 1, 6, 11, 7, 11, 1, 12, 13, 13, 11)
Y <- c(9, 8, 16, 13, 11, 12, 7, 7, 13, 17, 10)

# Membuat dataframe
dataku <- data.frame(X = X, Y = Y)

# Menampilkan data
dataku

# Menggunakan fungsi cor.test untuk menghitung Tau-Kendall
cor.test(dataku$X, dataku$Y, method = "kendall")


    Kendall's rank correlation tau

data:  dataku$X and dataku$Y
z = 1.7529, p-value = 0.07962
alternative hypothesis: true tau is not equal to 0
sample estimates:
      tau 
0.4273658

# Menggunakan fungsi cor.test untuk menghitung korelasi Spearman
cor.test(dataku$X, dataku$Y, method = "spearman")


    Spearman's rank correlation rho

data:  dataku$X and dataku$Y
S = 108.98, p-value = 0.1134
alternative hypothesis: true rho is not equal to 0
sample estimates:
      rho 
0.5046513

2.1.1 Chi-Square Test

# Membuat data contoh
# Data asli dalam bentuk tabel silang
frekuensi <- matrix(c(30, 21, 30, 19, 15, 35), nrow = 2)
rownames(frekuensi) <- c("Kontrak", "Tetap")
colnames(frekuensi) <- c("Rendah", "Sedang", "Tinggi")

# Inisiasi vektor kosong untuk menyimpan data
Status_Pegawai <- c()
Tingkat_Produktivitas <- c()

# Mengulang setiap kombinasi sesuai dengan frekuensinya
for (i in 1:nrow(frekuensi)) {
  for (j in 1:ncol(frekuensi)) {
    Status_Pegawai <- c(Status_Pegawai, 
                        rep(rownames(frekuensi)[i],
                            frekuensi[i, j]))
    
    Tingkat_Produktivitas <- c(Tingkat_Produktivitas, 
                               rep(colnames(frekuensi)[j], 
                                   frekuensi[i, j]))
  }
}

# Membuat dataframe
dataku2 <- data.frame(Status_Pegawai, 
                      Tingkat_Produktivitas)
# Menampilkan data
head(dataku2)

  Status_Pegawai Tingkat_Produktivitas
1        Kontrak                Rendah
2        Kontrak                Rendah
3        Kontrak                Rendah
4        Kontrak                Rendah
5        Kontrak                Rendah
6        Kontrak                Rendah

# Transformasi menejadi factor
dataku2$Status_Pegawai <- as.factor(dataku2$Status_Pegawai)
dataku2$Tingkat_Produktivitas <- as.factor(dataku2$Tingkat_Produktivitas)

summary(dataku2)

 Status_Pegawai Tingkat_Produktivitas
 Kontrak:75     Rendah:51            
 Tetap  :75     Sedang:49            
                Tinggi:50

# Melakukan tabel kontingensi
dataku2_kt <- table(dataku2$Status_Pegawai, dataku2$Tingkat_Produktivitas)
dataku2_kt

         
          Rendah Sedang Tinggi
  Kontrak     30     30     15
  Tetap       21     19     35

# Melakukan uji Chi-Square
chisq.test(dataku2_kt)


    Pearson's Chi-squared test

data:  dataku2_kt
X-squared = 12.058, df = 2, p-value = 0.002408

2.2 Difference Test

2.2.1 Two sample test (Independent)

2.2.1.1 Mann-Whitney Test

# Membuat data contoh
# Vektor data untuk efisiensi pada skala besar dan kecil
efisiensi_besar <- c(1.31, 1.25, 1.32, 1.3, 1.33, 1.31, 1.35, 1.34, 0.28, 1.34, 1.28)
efisiensi_kecil <- c(1.21, 1.28, 1.32, 1.25, 1.27, 1.31, 1.26, 1.31, 1.24, 1.22)

wilcox.test(efisiensi_besar, efisiensi_kecil)


    Wilcoxon rank sum test with continuity correction

data:  efisiensi_besar and efisiensi_kecil
W = 82.5, p-value = 0.05614
alternative hypothesis: true location shift is not equal to 0

2.2.1.2 Chi-Square Test

2.2.2 More than two sample test (Independent)

2.2.2.1 Kruskal-Wallis Test

# Membuat data contoh
# Membuat vektor untuk Industri A, B, dan C
industri_A <- c(2.33, 2.79, 3.01, 2.33, 1.22, 2.79, 1.9, 1.65)
industri_B <- c(2.33, 2.33, 2.79, 3.01, 1.99, 2.45)
industri_C <- c(1.06, 1.37, 1.09, 1.65, 1.44, 1.11) 

# Membuat vektor industri
industri <- c(rep("Industri A", length(industri_A)), 
              rep("Industri B", length(industri_B)), 
              rep("Industri C", length(industri_C)))

# Menggabungkan semua vektor value
nilai <- c(industri_A, industri_B, industri_C)

# Membuat data frame
dataku4 <- data.frame(industri, nilai)

# Menampilkan data frame
dataku4$industri <- as.factor(dataku4$industri)
dataku4

     industri nilai
1  Industri A  2.33
2  Industri A  2.79
3  Industri A  3.01
4  Industri A  2.33
5  Industri A  1.22
6  Industri A  2.79
7  Industri A  1.90
8  Industri A  1.65
9  Industri B  2.33
10 Industri B  2.33
11 Industri B  2.79
12 Industri B  3.01
13 Industri B  1.99
14 Industri B  2.45
15 Industri C  1.06
16 Industri C  1.37
17 Industri C  1.09
18 Industri C  1.65
19 Industri C  1.44
20 Industri C  1.11

# Uji kruskal wallis
kruskal.test(nilai ~ industri, data = dataku4)


    Kruskal-Wallis rank sum test

data:  nilai by industri
Kruskal-Wallis chi-squared = 10.619, df = 2, p-value = 0.004943

# Post hoc kruskal-wallis - Uji Dun
#installed.packages("FSA")
library(FSA)
dunnTest(nilai ~ industri, data = dataku4)

               Comparison          Z     P.unadj      P.adj
1 Industri A - Industri B -0.6428883 0.520296550 0.52029655
2 Industri A - Industri C  2.6109139 0.009030062 0.01806012
3 Industri B - Industri C  3.0436533 0.002337243 0.00701173

2.2.2.2 Chi-Square Test

2.2.3 Two sample test (Dependent)

2.2.3.1 Sign Test

# Membuat data contoh
# Data Skor Kepuasan
produk_lama <- c(16, 15, 18, 16, 17, 18, 20, 15, 14, 16, 19, 17)
produk_baru <- c(18, 17, 16, 19, 17, 20, 18, 16, 15, 18, 20, 18)
# Data Responden
responden <- c(1:12)
# Membuat data frame
dataku5 <- data.frame(Responden = c(rep(responden, 2)),
                      Produk = factor(c(rep("Produk Lama", length(produk_lama)), 
                                        rep("Produk Baru", length(produk_baru)))),
                      Skor_Kepuasan = c(produk_lama, produk_baru))
# Menampilkan data frame
dataku5

   Responden      Produk Skor_Kepuasan
1          1 Produk Lama            16
2          2 Produk Lama            15
3          3 Produk Lama            18
4          4 Produk Lama            16
5          5 Produk Lama            17
6          6 Produk Lama            18
7          7 Produk Lama            20
8          8 Produk Lama            15
9          9 Produk Lama            14
10        10 Produk Lama            16
11        11 Produk Lama            19
12        12 Produk Lama            17
13         1 Produk Baru            18
14         2 Produk Baru            17
15         3 Produk Baru            16
16         4 Produk Baru            19
17         5 Produk Baru            17
18         6 Produk Baru            20
19         7 Produk Baru            18
20         8 Produk Baru            16
21         9 Produk Baru            15
22        10 Produk Baru            18
23        11 Produk Baru            20
24        12 Produk Baru            18

# Menghitung perbedaan
diff <- dataku5[dataku5$Produk == 'Produk Baru', ]$Skor_Kepuasan - dataku5[dataku5$Produk == 'Produk Lama', ]$Skor_Kepuasan

# Menghitung jumlah perbedaan yang positif
jumlah_positif <- sum(diff > 0)

# Melakukan uji tanda
binom.test(jumlah_positif, length(diff), 
           p = 0.5, 
           alternative = "two.sided")


    Exact binomial test

data:  jumlah_positif and length(diff)
number of successes = 9, number of trials = 12, p-value = 0.146
alternative hypothesis: true probability of success is not equal to 0.5
95 percent confidence interval:
 0.4281415 0.9451394
sample estimates:
probability of success 
                  0.75

Interpretation: https://www.geeksforgeeks.org/sign-test-in-r/

2.2.4 More than two sample test (Dependent)

2.2.4.1 Friedman Test

# Membuat data contoh
dataku6 <- matrix(c(1.24,1.50,1.62,
              1.71,1.85,2.05,
              1.37,2.12,1.68,
              2.53,1.87,2.62,
              1.23,1.34,1.51,
              1.94,2.33,2.86,
              1.72,1.43,2.86), nrow = 7, byrow = TRUE,
              dimnames = list(Person= as.character(1:7),
              Obat = c("Obat A","Obat B","Obat C")))
dataku6

      Obat
Person Obat A Obat B Obat C
     1   1.24   1.50   1.62
     2   1.71   1.85   2.05
     3   1.37   2.12   1.68
     4   2.53   1.87   2.62
     5   1.23   1.34   1.51
     6   1.94   2.33   2.86
     7   1.72   1.43   2.86

friedman.test(dataku6)


    Friedman rank sum test

data:  dataku6
Friedman chi-squared = 8.8571, df = 2, p-value = 0.01193

2.2.4.2 Cochran Test

# Membuat data contoh
## Input data
responden <- c(1:8)
produk_A <- c("Tidak","Tidak","Ya","Ya","Ya","Tidak","Tidak","Tidak")
produk_B <- c("Tidak","Ya","Ya","Ya","Tidak","Tidak","Ya","Tidak")
produk_C <- c("Ya","Tidak","Tidak","Ya","Tidak","Ya","Ya","Tidak")
dataku7 <- data.frame(responden, produk_A, produk_B, produk_C)
dataku7$produk_A <- as.factor(dataku7$produk_A)
dataku7$produk_B <- as.factor(dataku7$produk_B)
dataku7$produk_C <- as.factor(dataku7$produk_C)
dataku7

  responden produk_A produk_B produk_C
1         1    Tidak    Tidak       Ya
2         2    Tidak       Ya    Tidak
3         3       Ya       Ya    Tidak
4         4       Ya       Ya       Ya
5         5       Ya    Tidak    Tidak
6         6    Tidak    Tidak       Ya
7         7    Tidak       Ya       Ya
8         8    Tidak    Tidak    Tidak

dataku7 <-ifelse(dataku7=="Ya", 1,0)

#install.packages("nonpar")
library(nonpar)
cochrans.q(as.matrix(dataku7[,-1]), alpha = 0.05)


 Cochran's Q Test 
 
 H0: There is no difference in the effectiveness of treatments. 
 HA: There is a difference in the effectiveness of treatments. 
 
 Q = 0.333333333333333 
 
 Degrees of Freedom = 2 
 
 Significance Level = 0.05 
 The p-value is  0.846481724890614