Reciprocal scaling with CCA

Code
# Paths
library(here)

# Multivariate analysis
library(ade4)
library(adegraphics)

# Matrix algebra
library(expm)

# Plots
library(CAnetwork)
Code
# Define bound for comparison to zero
zero <- 10e-10

Introduction

This is an extension of reciprocal scaling defined for correspondence analysis by Thioulouse and Chessel (1992) to canonical correspondence analysis.

Here, we start from two matrices:

  • A data matrix \(Y\) (\(r \times c\))
  • A matrix of predictor variables (environmental variabnes) \(E\) (\(r \times l\))

(r <- dim(Y)[1])
[1] 26
(c <- dim(Y)[2])
[1] 21
(l <- ncol(E))
[1] 6

Computation

TL;DR

We can compute reciprocal scaling score using CCorA and CCA scores.

From canonical correlation analysis We perform the CCorA of tables \(E_{infl}\) and \(Y_{infl}\). The two sets of scores give the correspondences in sites and species space.

From CCA We can show that\(scoreC = V\) (c1) and \(scoreR = Z2\) (l1).

From canonical correlation analysis

To perform the canonical correlation analysis, we compute the inflated tables \(R\) (\(n_{\bar{0}} \times l\)) and \(C\) (\(n_{\bar{0}} \times c\)) from \(Y\) (\(r \times c\)) and \(E\) (\(r \times l\)). The difference with CA is that we use \(E\) instead of \(Y\) to compute the inflated table \(R\). \(R\) is equivalent to \(E\) where rows of \(E\) are duplicated as many times as there are correspondences in \(Y\).

# Transform matrix to count table
P <- Y/sum(Y)
Pfreq <- as.data.frame(as.table(P))
colnames(Pfreq) <- c("row", "col", "Freq")

# Remove the cells with no observation
Pfreq0 <- Pfreq[-which(Pfreq$Freq == 0),]
Pfreq0$colind <- match(Pfreq0$col, colnames(Y)) # match index and species names
Pfreq0$col <- factor(Pfreq0$col)
Pfreq0$row <- factor(Pfreq0$row)

pi_ <- rowSums(P)
p_j <- colSums(P)

We take the frequency table defined before and use it to compute the inflated tables (with weights):

# Create indicator tables
tabR <- acm.disjonctif(as.data.frame(Pfreq0$row))
tabR <- as.matrix(tabR) %*% as.matrix(E) # duplicate rows of E according to the correspondences of Y
tabC <- acm.disjonctif(as.data.frame(Pfreq0$col))
colnames(tabC) <- colnames(Y)

# Get weights
wt <- Pfreq0$Freq

Below are the first lines of tables \(R\) and \(C\):

Code
knitr::kable(head(tabR, 30))
location elevation patch_area perc_forests perc_grasslands ShannonLandscapeDiv
0 10 6.28 7.7882 67.7785 0.232
0 30 7.92 16.4129 43.4066 0.274
0 430 83.24 24.4526 28.4995 0.274
0 420 140.83 41.9966 34.2412 0.260
0 400 140.83 41.9966 34.2412 0.260
0 500 0.50 7.5445 67.0780 0.240
0 470 7.84 19.3087 56.4031 0.253
0 110 21.15 23.4668 49.8908 0.262
0 460 24.82 21.3932 57.0430 0.243
0 450 3.11 4.8285 73.7518 0.216
0 40 6.66 7.4729 66.0061 0.240
0 160 5.63 32.3516 45.0477 0.253
0 160 10.99 29.7139 18.8270 0.291
1 10 6.28 7.7882 67.7785 0.232
1 30 7.92 16.4129 43.4066 0.274
1 430 83.24 24.4526 28.4995 0.274
1 420 140.83 41.9966 34.2412 0.260
1 400 140.83 41.9966 34.2412 0.260
1 500 0.50 7.5445 67.0780 0.240
1 470 7.84 19.3087 56.4031 0.253
1 110 21.15 23.4668 49.8908 0.262
1 460 24.82 21.3932 57.0430 0.243
1 450 3.11 4.8285 73.7518 0.216
1 40 6.66 7.4729 66.0061 0.240
1 160 5.63 32.3516 45.0477 0.253
1 160 10.99 29.7139 18.8270 0.291
0 160 10.99 29.7139 18.8270 0.291
1 500 0.50 7.5445 67.0780 0.240
1 460 24.82 21.3932 57.0430 0.243
1 450 3.11 4.8285 73.7518 0.216
Code
knitr::kable(head(tabC, 30))
sp1 sp2 sp3 sp4 sp5 sp6 sp7 sp8 sp9 sp10 sp11 sp12 sp13 sp14 sp15 sp16 sp17 sp18 sp19 sp21 sp22
1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

Then, we perform a canonical correlation on the scaled tables \(R_{scaled}\) and \(C_{scaled}\). We find the coefficients \(\rho\) and \(\gamma\) maximizing the correlation between the scores \(S_R = R_{scaled} \rho\) and \(S_C = C_{scaled} \gamma\).

# Center tables
tabR_scaled <- scalewt(tabR, wt, 
                       scale = FALSE)
tabC_scaled <- scalewt(tabC, wt, 
                       scale = FALSE)

res <- cancor(diag(sqrt(wt)) %*% tabR_scaled, 
              diag(sqrt(wt)) %*% tabC_scaled, 
              xcenter = FALSE, ycenter = FALSE)
# res gives the coefficients of the linear combinations that maximizes the correlation between the 2 dimensions
dim(res$xcoef) # l columns -> R_scaled is of full rank
[1] 6 6
dim(res$ycoef) # c-1 columns -> C_scaled is not of full rank
[1] 20 20
# Compute these scores from this coef
scoreR <- tabR_scaled[, 1:l]  %*% res$xcoef
scoreC <- tabC_scaled[, 1:(c-1)]  %*% res$ycoef
rownames(scoreC) <- Pfreq0$col
rownames(scoreR) <- Pfreq0$row

From CCA scores

neig <- min(c(r-1, c, l))
ca <- dudi.coa(Y, 
               nf = c-1, 
               scannf = FALSE)
cca <- pcaiv(dudi = ca, 
             df = E,
             scannf = FALSE,
             nf = neig)
sqrt(cca$eig)/res$cor
[1] 1 1 1 1 1 1

We check that unique scores correspond:

fac <- Pfreq0$col
s.class(scoreC,
        wt = wt,
        fac = fac,
        plabels.col = params$colspp, 
        main = "Unique C scores")

fac <- Pfreq0$row
s.class(scoreR,
        wt = wt,
        fac = fac,
        plabels.col = params$colsite, 
        main = "Unique R scores")

mrowsC <- meanfacwt(scoreC, fac = Pfreq0$col, wt = Pfreq0$Freq)
all(abs(abs(mrowsC/cca$c1) - 1) < zero)
[1] TRUE
mrowsR <- meanfacwt(scoreR, fac = Pfreq0$row, wt = Pfreq0$Freq)
all(abs(abs(mrowsR/cca$l1) - 1) < zero)
[1] TRUE

Conditional means and variances

TL;DR

For the scores computed with R scores and C scores, we have 3 types of formulas:

  • definition (weighted mean/variance/covariance)
  • formulas using the CCA scores

Here, we don’t show the definition.

Formulas using CCA scores

  • using R score
Rows (sites) Columns (species)
Mean for axis \(k\) \(m_k(i) = z2_{ik}\) ($l1) \(m_k(j) = v_{jk}\) ($c1)
Variance for axis \(k\) None \[s_{jk}^2 = \frac{1}{p_{\cdot j}} \sum_{i = 1}^r \left(p_{ij} {z2_{ik}}^2 - {v_{jk}}^2 \right)\]
Covariance between axes \(k\), \(l\)
  • using C score
Rows (sites) Columns (species)
Mean for axis \(k\) \(u^\star_{ik}\) (WA score ls) \[v_{jk}\] ($c1)
Variance for axis \(k\) \[s_k^2(i) = \frac{1}{p_{i \cdot}} \sum_{j = 1}^c \left(p_{ij} v_{jk} \right) - {u^\star_{ik}}^2\] None
Covariance between axes \(k\), \(l\)

For rows = sites (group per row)

In this section, we compute means and variances per row (for sites).

scoreC

Below is a graphical illustration of scoreC grouped by rows:

s.class(scoreC,
        wt = Pfreq0$Freq,
        fac = as.factor(Pfreq0$row),
        ppoints.col = params$colspp,
        plabels.col = params$colsite, 
        main = "With C score (points = species scores)")

We compute the following mean: \(m_k(i) = \frac{1}{y_{i\cdot}} \sum_{j = 1}^c p_{ij} \text{scoreC}_k(i,j)\). To compute these conditional means, we use the meanfacwt function from the ade4 package.

mrowsC <- meanfacwt(scoreC, fac = Pfreq0$row, wt = Pfreq0$Freq)

Then we compare this to a score obtained from CCA.

mrowsC/cca$ls
   Axis1 Axis2 Axis3 Axis4 Axis5 Axis6
1      1     1     1    -1     1     1
2      1     1     1    -1     1     1
3      1     1     1    -1     1     1
4      1     1     1    -1     1     1
5      1     1     1    -1     1     1
6      1     1     1    -1     1     1
7      1     1     1    -1     1     1
8      1     1     1    -1     1     1
9      1     1     1    -1     1     1
10     1     1     1    -1     1     1
11     1     1     1    -1     1     1
12     1     1     1    -1     1     1
13     1     1     1    -1     1     1
14     1     1     1    -1     1     1
15     1     1     1    -1     1     1
16     1     1     1    -1     1     1
17     1     1     1    -1     1     1
18     1     1     1    -1     1     1
19     1     1     1    -1     1     1
20     1     1     1    -1     1     1
21     1     1     1    -1     1     1
22     1     1     1    -1     1     1
23     1     1     1    -1     1     1
24     1     1     1    -1     1     1
25     1     1     1    -1     1     1
26     1     1     1    -1     1     1

??????? axis 4 ?????

Now let’s compute the variance of scoreC per row:

varrowsC <- varfacwt(scoreC, fac = Pfreq0$row, wt = Pfreq0$Freq)

We can use this relationship to find the formula using CCA scores:

varrowsC_CCA <- matrix(nrow = r, 
                       ncol = neig)

for (i in 1:r) {
  # Get CA scores for site i
  Li <- cca$ls[i, ]
  
  # Compute the part with the sum on Cj
  # we add all coordinates Cj^2 weighted by the number of observations on site i
  sumj <- t(P[i, ]) %*% as.matrix(cca$c1)^2
  # sumj <- t(Y[i, ]) %*% as.matrix(cca$co)^2

  # Fill i-th row with site i variance along each axis
  # varrowsC_CCA[i, ] <- 1/(n*lambda_CCA)*((1/pi_[i])*sumj - lambda_CCA*as.numeric(Li)^2)
  varrowsC_CCA[i, ] <- ((1/pi_[i])*sumj - as.numeric(Li)^2)
}
varrowsC[, 1:neig]/varrowsC_CCA
   X1 X2 X3 X4 X5 X6
1   1  1  1  1  1  1
2   1  1  1  1  1  1
3   1  1  1  1  1  1
4   1  1  1  1  1  1
5   1  1  1  1  1  1
6   1  1  1  1  1  1
7   1  1  1  1  1  1
8   1  1  1  1  1  1
9   1  1  1  1  1  1
10  1  1  1  1  1  1
11  1  1  1  1  1  1
12  1  1  1  1  1  1
13  1  1  1  1  1  1
14  1  1  1  1  1  1
15  1  1  1  1  1  1
16  1  1  1  1  1  1
17  1  1  1  1  1  1
18  1  1  1  1  1  1
19  1  1  1  1  1  1
20  1  1  1  1  1  1
21  1  1  1  1  1  1
22  1  1  1  1  1  1
23  1  1  1  1  1  1
24  1  1  1  1  1  1
25  1  1  1  1  1  1
26  1  1  1  1  1  1
varrowsC[, 1:neig]/varrowsC_CCA
   X1 X2 X3 X4 X5 X6
1   1  1  1  1  1  1
2   1  1  1  1  1  1
3   1  1  1  1  1  1
4   1  1  1  1  1  1
5   1  1  1  1  1  1
6   1  1  1  1  1  1
7   1  1  1  1  1  1
8   1  1  1  1  1  1
9   1  1  1  1  1  1
10  1  1  1  1  1  1
11  1  1  1  1  1  1
12  1  1  1  1  1  1
13  1  1  1  1  1  1
14  1  1  1  1  1  1
15  1  1  1  1  1  1
16  1  1  1  1  1  1
17  1  1  1  1  1  1
18  1  1  1  1  1  1
19  1  1  1  1  1  1
20  1  1  1  1  1  1
21  1  1  1  1  1  1
22  1  1  1  1  1  1
23  1  1  1  1  1  1
24  1  1  1  1  1  1
25  1  1  1  1  1  1
26  1  1  1  1  1  1

We can also compute the covariance between axes \(k\) and \(l\) for site \(i\) from CCorA scores:

\[c_{kl}(i) = \frac{1}{y_{i+}}\sum_{j=1}^c p_{ij}{s_Y}_k(i, j){s_Y}_l(i, j) - m_{ik}m_{il}\]

covarrowsC <- covfacwt(scoreC, fac = Pfreq0$row, wt = Pfreq0$Freq)

This is equivalent to the following formula using CCA scores:

\[ c_{kl}(i) = \frac{1}{n} \left( \sum_{j=1}^c p_{ij}v_{jk}v_{jl} - u^\star_{ik}u^\star_{il} \right)\]

# Define axes k and l
kk <- 1
lk <- 2

Below, we compute the covariance for all resources between axes $k=$1 and $l = $2 and compare to the covariance computed from the weighted covariance of CCorA scores:

# Initialize vector
covarrowsC_CCA <- vector(mode = "numeric", length = r)

for (i in 1:r) {
  # Compute the sum
  sumj <- sum(P[i, ]*cca$c1[, kk]*cca$c1[, lk])

  # Covariance for species i
  covarrowsC_CCA[i] <- ((1/pi_[i])*sumj - cca$ls[i, kk]*cca$ls[i, lk])
}
covarrowsC_kl <- sapply(covarrowsC, function(x) x[kk, lk])
res <- covarrowsC_CCA/covarrowsC_kl
all(abs(abs(res) - 1) < zero)
[1] TRUE
scoreR

Now, we compute the mean per row (site) from the row (sites) scores.

Below is a graphical illustration of scoreR grouped by rows:

s.class(scoreR,
        wt = Pfreq0$Freq,
        fac = as.factor(Pfreq0$row),
        ppoints.col = params$colsite,
        plabels.col = params$colsite, 
        main = "With R score (points = sites scores)")

All points of the same site ares superimposed.

mrowsR <- meanfacwt(scoreR, fac = Pfreq0$row, wt = Pfreq0$Freq)

mrowsR/cca$l1
   RS1 RS2 RS3 RS4 RS5 RS6
1    1   1   1  -1   1   1
2    1   1   1  -1   1   1
3    1   1   1  -1   1   1
4    1   1   1  -1   1   1
5    1   1   1  -1   1   1
6    1   1   1  -1   1   1
7    1   1   1  -1   1   1
8    1   1   1  -1   1   1
9    1   1   1  -1   1   1
10   1   1   1  -1   1   1
11   1   1   1  -1   1   1
12   1   1   1  -1   1   1
13   1   1   1  -1   1   1
14   1   1   1  -1   1   1
15   1   1   1  -1   1   1
16   1   1   1  -1   1   1
17   1   1   1  -1   1   1
18   1   1   1  -1   1   1
19   1   1   1  -1   1   1
20   1   1   1  -1   1   1
21   1   1   1  -1   1   1
22   1   1   1  -1   1   1
23   1   1   1  -1   1   1
24   1   1   1  -1   1   1
25   1   1   1  -1   1   1
26   1   1   1  -1   1   1

The variance is null.

For columns = species (group per column)

scoreC

We group the scoreC per species. Below is an illustration:

s.class(scoreC,
        wt = Pfreq0$Freq,
        fac = as.factor(Pfreq0$colind),
        lab = colnames(Y),
        ppoints.col = params$colspp,
        plabels.col = params$colspp, 
        main = "With C score (points = species scores)")

mcolsC <- meanfacwt(scoreC, fac = Pfreq0$colind, wt = Pfreq0$Freq)

mcolsC/cca$c1
     CS1 CS2 CS3 CS4 CS5 CS6
sp1    1   1   1  -1   1   1
sp2    1   1   1  -1   1   1
sp3    1   1   1  -1   1   1
sp4    1   1   1  -1   1   1
sp5    1   1   1  -1   1   1
sp6    1   1   1  -1   1   1
sp7    1   1   1  -1   1   1
sp8    1   1   1  -1   1   1
sp9    1   1   1  -1   1   1
sp10   1   1   1  -1   1   1
sp11   1   1   1  -1   1   1
sp12   1   1   1  -1   1   1
sp13   1   1   1  -1   1   1
sp14   1   1   1  -1   1   1
sp15   1   1   1  -1   1   1
sp16   1   1   1  -1   1   1
sp17   1   1   1  -1   1   1
sp18   1   1   1  -1   1   1
sp19   1   1   1  -1   1   1
sp21   1   1   1  -1   1   1
sp22   1   1   1  -1   1   1

The variance is null.

scoreR

Below are the scoreR grouped by column:

s.class(scoreR,
        wt = Pfreq0$Freq,
        fac = as.factor(Pfreq0$colind),
        lab = colnames(Y),
        ppoints.col = params$colsite,
        plabels.col = params$colspp, 
        main = "With R score (points = sites scores)")

mcolsR <- meanfacwt(scoreR, fac = Pfreq0$colind, wt = Pfreq0$Freq)

mcolsR/cca$co
     Comp1 Comp2 Comp3 Comp4 Comp5 Comp6
sp1      1     1     1    -1     1     1
sp2      1     1     1    -1     1     1
sp3      1     1     1    -1     1     1
sp4      1     1     1    -1     1     1
sp5      1     1     1    -1     1     1
sp6      1     1     1    -1     1     1
sp7      1     1     1    -1     1     1
sp8      1     1     1    -1     1     1
sp9      1     1     1    -1     1     1
sp10     1     1     1    -1     1     1
sp11     1     1     1    -1     1     1
sp12     1     1     1    -1     1     1
sp13     1     1     1    -1     1     1
sp14     1     1     1    -1     1     1
sp15     1     1     1    -1     1     1
sp16     1     1     1    -1     1     1
sp17     1     1     1    -1     1     1
sp18     1     1     1    -1     1     1
sp19     1     1     1    -1     1     1
sp21     1     1     1    -1     1     1
sp22     1     1     1    -1     1     1
varcolsR <- varfacwt(scoreR, fac = Pfreq0$colind, wt = Pfreq0$Freq)

Similarly we can use this relationship to compute the variance directly from the CCA scores (see in table).

We can also compute the covariance between axes \(k\) and \(l\) for species \(j\) from CCorA scores:

\[c_{kl}(j) = \frac{1}{y_{+j}}\sum_{i=1}^r p_{ij}{s_R}_k(i, j){s_R}_l(i, j) - m_{jk}m_{jl}\]

covarcolsR <- covfacwt(scoreR, fac = Pfreq0$col, wt = Pfreq0$Freq)

This is equivalent to the following formula using CCA scores:

\[c_{kl}(j) = \frac{1}{n} \left( \sum_{i=1}^r p_{ij}z2_{ik}z2_{il} - v^\star_{jk}v^\star_{jl} \right)\]

# Define axes k and l
kk <- 1
lk <- 2

Below, we compute the covariance for all resources between axes $k=$1 and $l = $2 and compare to the covariance computed from the weighted covariance of CCorA scores:

# Initialize vector
covarcolsR_CCA <- vector(mode = "numeric", length = c)

for (j in 1:c) {
  # Compute the sum
  sumi <- sum(P[, j]*cca$l1[, kk]*cca$l1[, lk])

  # Covariance for species i
  covarcolsR_CCA[j] <- ((1/p_j[j])*sumi - cca$co[j, kk]*cca$co[j, lk])
}
# Species that appear only once in a site
sp1 <- which(colnames(Y) %in% c("sp7", "sp13"))

covarcolsR_kl <- sapply(covarcolsR, function(x) x[kk, lk])
res <- (covarcolsR_CCA/covarcolsR_kl)[-sp1]
all(abs(abs(res) - 1) < zero)
[1] TRUE

Non-reciprocal score

We can remain in the 2 biplots showing \(V\) with \(U^\star\) (c1 and ls) (scaling type 1 = rows niches) and \(U\) and \(V^\star\) (co and l1) (scaling type 2 = column niches).

\(H_1\) (scaling type 1, rows niches)

To do that, we define the “CA-correspondences” \(h1_k(i, j)\) for scaling type 1:

\[h1_k(i, j) = \frac{u^\star_k(i) + v_k(j)}{2}\]

Code
# Initialize results matrix
H1 <- matrix(nrow = nrow(Pfreq0), 
             ncol = neig)

for (k in 1:neig) { # For each axis
  ind <- 1 # initialize row index
  for (obs in 1:nrow(Pfreq0)) { # For each observation
    i <- Pfreq0$row[obs]
    j <- Pfreq0$col[obs]
    H1[ind, k] <- (cca$ls[i, k] + cca$c1[j, k])/2
    ind <- ind + 1
  }
}
Code
corresp <- paste(Pfreq0$row, Pfreq0$col, sep = "-")
multiplot(indiv_row = as.data.frame(H1), 
          indiv_row_lab = corresp,
          row_color = "black")

# Groupwise mean
mrows_nr <- meanfacwt(H1, fac = Pfreq0$row, wt = Pfreq0$Freq)
all(abs(abs(mrows_nr/cca$ls) - 1) < zero)
[1] TRUE
# Groupwise variance ---
# Compare to RC score
varrows_nr <- varfacwt(H1, fac = Pfreq0$row, wt = Pfreq0$Freq)
# Compare to mean of scoreC
res <- 4*varrows_nr  # Factor 4
all(abs(abs(res/varrowsC[, 1:neig]) - 1) < zero)
[1] TRUE
# Groupwise covariance
covarrows_nr <- covfacwt(H1, fac = Pfreq0$row, wt = Pfreq0$Freq)
res <- lapply(covarrows_nr, function(x) x*4)  # Factor 4
# We check for the first covariance
all(abs(abs(res[[1]]/covarrowsC[[1]][1:l, 1:l]) - 1) < zero)
[1] TRUE
Code
s.class(H1, 
        fac = Pfreq0$row, plabels.col = params$colsite, 
        wt = Pfreq0$Freq,
        main = "Non-reciprocal scaling")

\(h1_k(i, j)\) can also be defined from the canonical correlation scores:

\[h1_k(i, j) = \frac{m_{ik} + {s_Y}_k(i, j)}{2}\]

tabR <- as.matrix(acm.disjonctif(as.data.frame(Pfreq0$row)))
scoreRls <- tabR %*% mrowsC # Duplicate means according to correspondences of R

# Find back the formula from the scores 
H1_cancor <- (scoreRls[, 1:l] + scoreC[, 1:l])/2

all(abs(abs(H1_cancor/H1) - 1) < zero)
[1] TRUE

\(H_2\) (scaling type 2, columns niches)

We define the “CA-correspondences” \(h2_k(i, j)\) for scaling type 2:

\[h2_k(i, j) = \frac{z2_k(i) + v^\star_k(j)}{2}\]

Code
# Initialize results matrix
H2 <- matrix(nrow = nrow(Pfreq0), 
             ncol = neig)

for (k in 1:neig) { # For each axis
  ind <- 1 # initialize row index
  for (obs in 1:nrow(Pfreq0)) { # For each observation
    i <- Pfreq0$row[obs]
    j <- Pfreq0$col[obs]
    H2[ind, k] <- (cca$l1[i, k] + cca$co[j, k])/2
    ind <- ind + 1
  }
}
Code
corresp <- paste(Pfreq0$row, Pfreq0$col, sep = "-")
multiplot(indiv_row = as.data.frame(H2), 
          indiv_row_lab = corresp,
          row_color = "black")

# Groupwise mean ---
mcols_nr <- meanfacwt(H2, fac = Pfreq0$col, wt = Pfreq0$Freq)
# Compare with CCA score
all(abs(abs(mcols_nr/cca$co) - 1) < zero) 
[1] TRUE
# Groupwise variance ---
varcols_nr <- varfacwt(H2, fac = Pfreq0$col, wt = Pfreq0$Freq)
Code
fac_col <- factor(Pfreq0$col)
s.class(H2, 
        fac = fac_col, plabels.col = params$colspp, 
        wt = Pfreq0$Freq,
        main = "Non-reciprocal scaling")

\(h2_k(i, j)\) can also be defined from the canonical correlation scores:

\[h2_k(i, j) = \frac{ {s_R}_k(i, j) + {s_Y}_k(i, j) \sqrt{\lambda_k} }{2}\]

# Find back the formula from the scores 
H2_cancor <- (scoreR[, 1:neig] + scoreC[, 1:neig] %*% diag(cca$eig^(1/2)))/2
all(abs(abs(H2_cancor/H2) - 1) < zero)
[1] TRUE

References

Thioulouse, Jean, and Daniel Chessel. 1992. “A Method for Reciprocal Scaling of Species Tolerance and Sample Diversity: Ecological Archives.” Ecology 73 (2): 670–80. https://doi.org/10.2307/1940773.