#    Mixed Effects Models and Extensions in Ecology with R (2009)
#    Zuur, Ieno, Walker, Saveliev and Smith.    Springer
#    This file was produced by Alain Zuur (highstat@highstat.com)
#    www.highstat.com

#    This program is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 2 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.


## Datos de bioluminiscencia (source) según estación de muestreo
require(RCurl)
datos=getURL("https://dl.dropboxusercontent.com/u/7610774/Datos/zuur/ISIT.txt")
ISIT=read.csv(textConnection(datos),sep="\t")
head(ISIT)
ISIT$fStation=factor(ISIT$Station)
library(lattice)
xyplot(Sources~SampleDepth|fStation, data=ISIT, type=c("p"))

## Seleccionamos una estación y le ajustamos un modelo gam
library(mgcv)
op <- par(mfrow = c(2, 2), mar = c(5, 4, 1, 2))
Sources16 <- ISIT$Sources[ISIT$Station == 16]
Depth16 <- ISIT$SampleDepth[ISIT$Station == 16]

plot(Depth16, Sources16, type = "p")

## La librería mgcv utiliza splines s(x) como funciones para el modelo aditivo
## fx=F, k=-1 indica utilizar validación cruzada para estimar la cantidad de suavizamiento
## bs="cr" indica la utilización de splines cúbicos
M3 <- gam(Sources16 ~ s(Depth16,fx=F,k=-1,bs="cr"))
plot(M3, se = TRUE)
M3pred <- predict(M3, se = TRUE, type = "response")
plot(Depth16, Sources16, type = "p")
I1 <- order(Depth16)
lines(Depth16[I1], M3pred$fit[I1], lty=1)
lines(Depth16[I1], M3pred$fit[I1]+2*M3pred$se[I1],lty=2)
lines(Depth16[I1], M3pred$fit[I1]-2*M3pred$se[I1],lty=2)


#Residuos

E2 <- resid(M3)
F2 <- fitted(M3)
par(mfrow=c(2,2), mar=c(5,4,2,2))
plot(x = F2, y=E2, xlab="Fitted values", ylab="Residuals")
plot(x = Depth16, y=E2, xlab="Depth", ylab="Residuals")
hist(E2,main="", xlab="Residuals")

summary(M3)

##########################################################################################
## ¿Como funciona el ajuste mediante splines?
##########################################################################################

## Algunos splines elementales (piezas de puzzle para ajustarse a funciones arbitrarias)
## -------------------------------------------------------------------------------------
library(lattice)
x<-seq(0,1,length=25)

co<-matrix(nrow=6,ncol=4)
co[1:6,1:4]<-rnorm(mean=0,sd=5,24)
f1<-co[1,1]+co[1,2]*x+co[1,3]*x^2+co[1,4]*x^3
f2<-co[2,1]+co[2,2]*x+co[2,3]*x^2+co[2,4]*x^3
f3<-co[3,1]+co[3,2]*x+co[3,3]*x^2+co[3,4]*x^3
f4<-co[4,1]+co[4,2]*x+co[4,3]*x^2+co[4,4]*x^3
f5<-co[5,1]+co[5,2]*x+co[5,3]*x^2+co[5,4]*x^3
f6<-co[6,1]+co[6,2]*x+co[6,3]*x^2+co[6,4]*x^3
xall<-rep(x,6)
ID<-rep(c(1,2,3,4,5,6),each=25)
f<-c(f1,f2,f3,f4,f5,f6)
xyplot(f~xall|factor(ID),col=1,type="l",xlab="X values",ylab="Function f")


## Encajando todas las piezas del puzzle en un caso particular
## -------------------------------------------------------------

Sources<-ISIT$Sources[ISIT$Station==16]
Depth<-ISIT$SampleDepth[ISIT$Station==16]

Range<-max(Depth)-min(Depth)
Bins<-Range/4

B1<-min(Depth)+Bins
B2<-min(Depth)+2*Bins
B3<-min(Depth)+3*Bins

plot(Depth,Sources)
abline(v=B1,lty=2)
abline(v=B2,lty=2)
abline(v=B3,lty=2)

S1<-Sources[Depth<=B1]
D1<-Depth[Depth<B1]
tmp<-lm(S1~D1+I(D1^2) +I(D1^3))
F1<-fitted(tmp)
I1<-order(D1)
lines(D1[I1],F1[I1])

S1<-Sources[Depth > B1 & Depth<=B2]
D1<-Depth[Depth > B1 & Depth<=B2]
tmp<-lm(S1~D1+I(D1^2) +I(D1^3))
F1<-fitted(tmp)
I1<-order(D1)
lines(D1[I1],F1[I1])

S1<-Sources[Depth > B2 & Depth<=B3]
D1<-Depth[Depth > B2 & Depth<=B3]
tmp<-lm(S1~D1+I(D1^2) +I(D1^3))
F1<-fitted(tmp)
I1<-order(D1)
lines(D1[I1],F1[I1])

S1<-Sources[Depth > B3 ]
D1<-Depth[Depth > B3 ]
tmp<-lm(S1~D1+I(D1^2) +I(D1^3))
F1<-fitted(tmp)
I1<-order(D1)
lines(D1[I1],F1[I1])


## Efecto del número de nodos
## ---------------------------

Sources19<-ISIT$Sources[ISIT$Station==19]
Depth19<-ISIT$SampleDepth[ISIT$Station==19]

Depth01<-Depth19
Depth01<-Depth01-min(Depth01)
Depth01<-Depth01/max(Depth01)
I<-order(Depth01)
Depth01<-Depth01[I]
Sources19<-Sources19[I]

rk<-function(x,z){
  ((z-0.5)^2-1/12)*((x-0.5)^2-1/12)/4 -((abs(x-z)-0.5)^4 -0.5*(abs(x-z)-0.5)^2 +7/240)/24
}

spl.X<-function(x,xk){
  q<-length(xk)+2
  n<-length(x)
  X<-matrix(1,n,q)
  X[,2]<-x
  X[,3:q]<-outer(x,xk,FUN=rk)
  X
}

YALL<-vector(length=100*8)
XALL<-vector(length=100*8)
IDALL<-vector(length=100*8)
a1<-1
a2<-100

XkALL<-0
IDxk<-0
for (knots in 1: 8){
  xk<-1:knots/(knots+1)
  XkALL<-c(XkALL,xk)
  IDxk<-c(IDxk,rep(knots,length(xk)))
  
  X<-spl.X(Depth01,xk)
  tmp1<-lm(Sources19~X-1)
  xp<-1:100/100
  Xp<-spl.X(xp,xk)
  
  plot(Depth01,Sources19)
  lines(xp,Xp%*%coef(tmp1))
  YALL[a1:a2]<-Xp%*%coef(tmp1)
  XALL[a1:a2]<-xp
  IDALL[a1:a2]<-knots
  a1<-a1+100
  a2<-a2+100
  
  n<-length(xk)
  for (i in 1:n){
    abline(v=xk[i],lty=2)
  }
}
XkALL<-XkALL[-1]
IDxk<-IDxk[-1]

require(lattice)

IDALL2<-IDALL+2
xyplot(YALL~XALL|factor(IDALL2),type="l",col=1,xlab="Depth (scaled between 0 and 1)",
       ylab="Sources",a1=1,
       panel=function(x,y,subscripts,...){
         panel.lines(x,y,col=1,lwd=2)
         panel.points(Depth01,Sources19,col=1,cex=0.5)#
         a<-IDALL2[subscripts]
         if (a[1] == 3){for (i in 1:1) {panel.abline(v=XkALL[i],lty=2);panel.abline(v=0,lty=2);panel.abline(v=1,lty=2)} }
         if (a[1] == 4){for (i in 1:2) {panel.abline(v=XkALL[i+1],lty=2);panel.abline(v=0,lty=2);panel.abline(v=1,lty=2)} }
         if (a[1] == 5){for (i in 1:3) {panel.abline(v=XkALL[i+1+2],lty=2);panel.abline(v=0,lty=2);panel.abline(v=1,lty=2)} }
         if (a[1] == 6){for (i in 1:4) {panel.abline(v=XkALL[i+1+2+3],lty=2);panel.abline(v=0,lty=2);panel.abline(v=1,lty=2)} }
         if (a[1] == 7){for (i in 1:5) {panel.abline(v=XkALL[i+1+2+3+4],lty=2);panel.abline(v=0,lty=2);panel.abline(v=1,lty=2)} }
         if (a[1] == 8){for (i in 1:6) {panel.abline(v=XkALL[i+1+2+3+4+5],lty=2);panel.abline(v=0,lty=2);panel.abline(v=1,lty=2)} }
         if (a[1] == 9){for (i in 1:7) {panel.abline(v=XkALL[i+1+2+3+4+5+6],lty=2);panel.abline(v=0,lty=2);panel.abline(v=1,lty=2)} }
         if (a[1] == 10){for (i in 1:8) {panel.abline(v=XkALL[i+1+2+3+4+5+6+7],lty=2);panel.abline(v=0,lty=2);panel.abline(v=1,lty=2)} }
       }
)

## Selección de lambda (nivel óptimo de suavizamiento)
## ---------------------------------------------------
#Figure 3.10
#Show penalized spline in action
spl.S<-function(xk){
  q<-length(xk)+2
  S<-matrix(0,q,q)
  S[3:q,3:q]<-outer(xk,xk,FUN=rk)
  S
}

mat.sqrt<-function(S){
  d<-eigen(S,symmetric=TRUE)
  d$values[d$values<0]<-0
  rS<-d$vectors%*%diag(d$values^0.5)%*%t(d$vectors)
  rS
}

prs.fit<-function(y,x,xk,lambda){
  print(lambda)
  q<-length(xk)+2
  n<-length(x)
  Xa<-rbind(spl.X(x,xk),mat.sqrt(spl.S(xk))*sqrt(lambda))
  y[(n+1):(n+q)]<-0
  lm(y~Xa-1)
}

xk<-1:7/8
xp<-1:100/100

lambda<-1e-6

#Graph showing the effect of lambda
YALL<-vector(length=100*8)
XALL<-vector(length=100*8)
IDALL<-vector(length=100*8)
a1<-1
a2<-100
lambda<-10e-7
for (i in 1:8){
  a1
  mod.2<-prs.fit(Sources19,Depth01,xk,lambda)
  Xp<-spl.X(xp,xk)
  YALL[a1:a2]<-Xp%*%coef(mod.2)
  XALL[a1:a2]<-xp
  IDALL[a1:a2]<-rep(lambda,100)
  a1<-a1+100
  a2<-a2+100
  lambda<-lambda*10
}


xyplot(YALL~XALL|factor(IDALL),type="l",col=1,xlab="Depth (scaled between 0 and 1)",
       ylab="Sources",a1=1,
       panel=function(x,y,subscripts,...){
         panel.lines(x,y,col=1,lwd=2)
         panel.points(Depth01,Sources19,col=1,cex=0.5)}
)

#######################################################################################################
## Ejemplo de uso de modelos GAM
## Tenemos dos estaciones: ¿siguen el mismo patrón o existend diferencias significativas entre ellas?
#######################################################################################################

S8 <- ISIT$Sources[ISIT$Station == 8]
D8 <- ISIT$SampleDepth[ISIT$Station == 8]
S13 <- ISIT$Sources[ISIT$Station == 13]
D13 <- ISIT$SampleDepth[ISIT$Station == 13]
So <- c(S8, S13); De <- c(D8, D13)  # Datos conjuntos de ambas estaciones
ID <- rep(c(8, 13), c(length(S8), length(S13)))  # Identificador de estacion
mi <- max(min(D8), min(D13))        
ma <- min(max(D8), max(D13))
I1 <- De > mi & De < ma        # Rango común a ambas estaciones
op <- par(mfrow = c(2, 2))
## Graficamos los datos de ambas estaciones en el rango común
plot(D8[I1], S8[I1], pch = 16, xlab = "Depth",
     ylab = "Sources", col = 1, main = "Station 8",
     xlim = c(500, 3000), ylim = c(0, 40))
plot(D13[I1], S13[I1], pch = 16, xlab = "Depth",
     ylab = "Sources", col = 1, main = "Station 13",
     xlim = c(500, 3000), ylim = c(0, 40))
M8 <- gam(S8[I1] ~ s(D8[I1],fx=F,k=-1,bs="cr"))
M13 <- gam(S13[I1] ~ s(D13[I1],fx=F,k=-1,bs="cr"))
plot(M8, se = TRUE)
plot(M13,se=TRUE)
par(op)

## Estimación de un modelo común para ambas estaciones
require(mgcv)

M3 <- gam(So ~ s(De,fx=F,k=-1,bs="cr"))
plot(M3, se = TRUE)
anova(M3)
gam.check(M3)

## Estimación de dos perfiles paralelos, uno para cada estación

fID <-factor(ID)  
M4 <- gam(So ~ s(De) + fID, subset = I1)  # Modelo común para ambas estaciones
summary(M4)  # Mismo patrón, con diferencia constante 12.296 entre ambas estaciones
anova(M4)

AIC(M3,M4)

par(mar=c(2,2,2,2))
vis.gam(M4,theta=120,color="heat")  # Permite ver simultáneamente los dos ajustes


## Validación del modelo
gam.check(M4)


## Modelo con interacción entre estación y profundidad
M5<-gam(So ~ s(De)+ s(De, by = as.numeric(ID == 13)),
        subset = I1)

## Aquí s(De) es el suavizador para la primera estación y s(De, by = ...) representa la diferencia de la segunda respecto a la primera.            
anova(M5)

plot(M5)

gam.check(M5)

## Comparación entre el modelo sin y con interacciones
anova(M4,M5, test="F")
AIC(M4)
AIC(M5)

## El modelo anterior puede parametrizarse también mediante un suavizador distinto para cada estación:

M6 <- gam(So~s(De,by = as.numeric(ID== 8))+
            s(De,by = as.numeric(ID== 13))-1, subset=I1)


#O, de forma equivalente:
M6 <- gam(So~s(De,by = ID) + factor(ID),subset=I1)