# Mixed Effects Models and Extensions in Ecology with R (2009) # Zuur, Ieno, Walker, Saveliev and Smith. Springer # This file was produced by Alain Zuur (highstat@highstat.com) # www.highstat.com # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. ## Datos de bioluminiscencia (source) según estación de muestreo require(RCurl) datos=getURL("https://dl.dropboxusercontent.com/u/7610774/Datos/zuur/ISIT.txt") ISIT=read.csv(textConnection(datos),sep="\t") head(ISIT) ISIT$fStation=factor(ISIT$Station) library(lattice) xyplot(Sources~SampleDepth|fStation, data=ISIT, type=c("p")) ## Seleccionamos una estación y le ajustamos un modelo gam library(mgcv) op <- par(mfrow = c(2, 2), mar = c(5, 4, 1, 2)) Sources16 <- ISIT$Sources[ISIT$Station == 16] Depth16 <- ISIT$SampleDepth[ISIT$Station == 16] plot(Depth16, Sources16, type = "p") ## La librería mgcv utiliza splines s(x) como funciones para el modelo aditivo ## fx=F, k=-1 indica utilizar validación cruzada para estimar la cantidad de suavizamiento ## bs="cr" indica la utilización de splines cúbicos M3 <- gam(Sources16 ~ s(Depth16,fx=F,k=-1,bs="cr")) plot(M3, se = TRUE) M3pred <- predict(M3, se = TRUE, type = "response") plot(Depth16, Sources16, type = "p") I1 <- order(Depth16) lines(Depth16[I1], M3pred$fit[I1], lty=1) lines(Depth16[I1], M3pred$fit[I1]+2*M3pred$se[I1],lty=2) lines(Depth16[I1], M3pred$fit[I1]-2*M3pred$se[I1],lty=2) #Residuos E2 <- resid(M3) F2 <- fitted(M3) par(mfrow=c(2,2), mar=c(5,4,2,2)) plot(x = F2, y=E2, xlab="Fitted values", ylab="Residuals") plot(x = Depth16, y=E2, xlab="Depth", ylab="Residuals") hist(E2,main="", xlab="Residuals") summary(M3) ########################################################################################## ## ¿Como funciona el ajuste mediante splines? ########################################################################################## ## Algunos splines elementales (piezas de puzzle para ajustarse a funciones arbitrarias) ## ------------------------------------------------------------------------------------- library(lattice) x<-seq(0,1,length=25) co<-matrix(nrow=6,ncol=4) co[1:6,1:4]<-rnorm(mean=0,sd=5,24) f1<-co[1,1]+co[1,2]*x+co[1,3]*x^2+co[1,4]*x^3 f2<-co[2,1]+co[2,2]*x+co[2,3]*x^2+co[2,4]*x^3 f3<-co[3,1]+co[3,2]*x+co[3,3]*x^2+co[3,4]*x^3 f4<-co[4,1]+co[4,2]*x+co[4,3]*x^2+co[4,4]*x^3 f5<-co[5,1]+co[5,2]*x+co[5,3]*x^2+co[5,4]*x^3 f6<-co[6,1]+co[6,2]*x+co[6,3]*x^2+co[6,4]*x^3 xall<-rep(x,6) ID<-rep(c(1,2,3,4,5,6),each=25) f<-c(f1,f2,f3,f4,f5,f6) xyplot(f~xall|factor(ID),col=1,type="l",xlab="X values",ylab="Function f") ## Encajando todas las piezas del puzzle en un caso particular ## ------------------------------------------------------------- Sources<-ISIT$Sources[ISIT$Station==16] Depth<-ISIT$SampleDepth[ISIT$Station==16] Range<-max(Depth)-min(Depth) Bins<-Range/4 B1<-min(Depth)+Bins B2<-min(Depth)+2*Bins B3<-min(Depth)+3*Bins plot(Depth,Sources) abline(v=B1,lty=2) abline(v=B2,lty=2) abline(v=B3,lty=2) S1<-Sources[Depth<=B1] D1<-Depth[Depth B1 & Depth<=B2] D1<-Depth[Depth > B1 & Depth<=B2] tmp<-lm(S1~D1+I(D1^2) +I(D1^3)) F1<-fitted(tmp) I1<-order(D1) lines(D1[I1],F1[I1]) S1<-Sources[Depth > B2 & Depth<=B3] D1<-Depth[Depth > B2 & Depth<=B3] tmp<-lm(S1~D1+I(D1^2) +I(D1^3)) F1<-fitted(tmp) I1<-order(D1) lines(D1[I1],F1[I1]) S1<-Sources[Depth > B3 ] D1<-Depth[Depth > B3 ] tmp<-lm(S1~D1+I(D1^2) +I(D1^3)) F1<-fitted(tmp) I1<-order(D1) lines(D1[I1],F1[I1]) ## Efecto del número de nodos ## --------------------------- Sources19<-ISIT$Sources[ISIT$Station==19] Depth19<-ISIT$SampleDepth[ISIT$Station==19] Depth01<-Depth19 Depth01<-Depth01-min(Depth01) Depth01<-Depth01/max(Depth01) I<-order(Depth01) Depth01<-Depth01[I] Sources19<-Sources19[I] rk<-function(x,z){ ((z-0.5)^2-1/12)*((x-0.5)^2-1/12)/4 -((abs(x-z)-0.5)^4 -0.5*(abs(x-z)-0.5)^2 +7/240)/24 } spl.X<-function(x,xk){ q<-length(xk)+2 n<-length(x) X<-matrix(1,n,q) X[,2]<-x X[,3:q]<-outer(x,xk,FUN=rk) X } YALL<-vector(length=100*8) XALL<-vector(length=100*8) IDALL<-vector(length=100*8) a1<-1 a2<-100 XkALL<-0 IDxk<-0 for (knots in 1: 8){ xk<-1:knots/(knots+1) XkALL<-c(XkALL,xk) IDxk<-c(IDxk,rep(knots,length(xk))) X<-spl.X(Depth01,xk) tmp1<-lm(Sources19~X-1) xp<-1:100/100 Xp<-spl.X(xp,xk) plot(Depth01,Sources19) lines(xp,Xp%*%coef(tmp1)) YALL[a1:a2]<-Xp%*%coef(tmp1) XALL[a1:a2]<-xp IDALL[a1:a2]<-knots a1<-a1+100 a2<-a2+100 n<-length(xk) for (i in 1:n){ abline(v=xk[i],lty=2) } } XkALL<-XkALL[-1] IDxk<-IDxk[-1] require(lattice) IDALL2<-IDALL+2 xyplot(YALL~XALL|factor(IDALL2),type="l",col=1,xlab="Depth (scaled between 0 and 1)", ylab="Sources",a1=1, panel=function(x,y,subscripts,...){ panel.lines(x,y,col=1,lwd=2) panel.points(Depth01,Sources19,col=1,cex=0.5)# a<-IDALL2[subscripts] if (a[1] == 3){for (i in 1:1) {panel.abline(v=XkALL[i],lty=2);panel.abline(v=0,lty=2);panel.abline(v=1,lty=2)} } if (a[1] == 4){for (i in 1:2) {panel.abline(v=XkALL[i+1],lty=2);panel.abline(v=0,lty=2);panel.abline(v=1,lty=2)} } if (a[1] == 5){for (i in 1:3) {panel.abline(v=XkALL[i+1+2],lty=2);panel.abline(v=0,lty=2);panel.abline(v=1,lty=2)} } if (a[1] == 6){for (i in 1:4) {panel.abline(v=XkALL[i+1+2+3],lty=2);panel.abline(v=0,lty=2);panel.abline(v=1,lty=2)} } if (a[1] == 7){for (i in 1:5) {panel.abline(v=XkALL[i+1+2+3+4],lty=2);panel.abline(v=0,lty=2);panel.abline(v=1,lty=2)} } if (a[1] == 8){for (i in 1:6) {panel.abline(v=XkALL[i+1+2+3+4+5],lty=2);panel.abline(v=0,lty=2);panel.abline(v=1,lty=2)} } if (a[1] == 9){for (i in 1:7) {panel.abline(v=XkALL[i+1+2+3+4+5+6],lty=2);panel.abline(v=0,lty=2);panel.abline(v=1,lty=2)} } if (a[1] == 10){for (i in 1:8) {panel.abline(v=XkALL[i+1+2+3+4+5+6+7],lty=2);panel.abline(v=0,lty=2);panel.abline(v=1,lty=2)} } } ) ## Selección de lambda (nivel óptimo de suavizamiento) ## --------------------------------------------------- #Figure 3.10 #Show penalized spline in action spl.S<-function(xk){ q<-length(xk)+2 S<-matrix(0,q,q) S[3:q,3:q]<-outer(xk,xk,FUN=rk) S } mat.sqrt<-function(S){ d<-eigen(S,symmetric=TRUE) d$values[d$values<0]<-0 rS<-d$vectors%*%diag(d$values^0.5)%*%t(d$vectors) rS } prs.fit<-function(y,x,xk,lambda){ print(lambda) q<-length(xk)+2 n<-length(x) Xa<-rbind(spl.X(x,xk),mat.sqrt(spl.S(xk))*sqrt(lambda)) y[(n+1):(n+q)]<-0 lm(y~Xa-1) } xk<-1:7/8 xp<-1:100/100 lambda<-1e-6 #Graph showing the effect of lambda YALL<-vector(length=100*8) XALL<-vector(length=100*8) IDALL<-vector(length=100*8) a1<-1 a2<-100 lambda<-10e-7 for (i in 1:8){ a1 mod.2<-prs.fit(Sources19,Depth01,xk,lambda) Xp<-spl.X(xp,xk) YALL[a1:a2]<-Xp%*%coef(mod.2) XALL[a1:a2]<-xp IDALL[a1:a2]<-rep(lambda,100) a1<-a1+100 a2<-a2+100 lambda<-lambda*10 } xyplot(YALL~XALL|factor(IDALL),type="l",col=1,xlab="Depth (scaled between 0 and 1)", ylab="Sources",a1=1, panel=function(x,y,subscripts,...){ panel.lines(x,y,col=1,lwd=2) panel.points(Depth01,Sources19,col=1,cex=0.5)} ) ####################################################################################################### ## Ejemplo de uso de modelos GAM ## Tenemos dos estaciones: ¿siguen el mismo patrón o existend diferencias significativas entre ellas? ####################################################################################################### S8 <- ISIT$Sources[ISIT$Station == 8] D8 <- ISIT$SampleDepth[ISIT$Station == 8] S13 <- ISIT$Sources[ISIT$Station == 13] D13 <- ISIT$SampleDepth[ISIT$Station == 13] So <- c(S8, S13); De <- c(D8, D13) # Datos conjuntos de ambas estaciones ID <- rep(c(8, 13), c(length(S8), length(S13))) # Identificador de estacion mi <- max(min(D8), min(D13)) ma <- min(max(D8), max(D13)) I1 <- De > mi & De < ma # Rango común a ambas estaciones op <- par(mfrow = c(2, 2)) ## Graficamos los datos de ambas estaciones en el rango común plot(D8[I1], S8[I1], pch = 16, xlab = "Depth", ylab = "Sources", col = 1, main = "Station 8", xlim = c(500, 3000), ylim = c(0, 40)) plot(D13[I1], S13[I1], pch = 16, xlab = "Depth", ylab = "Sources", col = 1, main = "Station 13", xlim = c(500, 3000), ylim = c(0, 40)) M8 <- gam(S8[I1] ~ s(D8[I1],fx=F,k=-1,bs="cr")) M13 <- gam(S13[I1] ~ s(D13[I1],fx=F,k=-1,bs="cr")) plot(M8, se = TRUE) plot(M13,se=TRUE) par(op) ## Estimación de un modelo común para ambas estaciones require(mgcv) M3 <- gam(So ~ s(De,fx=F,k=-1,bs="cr")) plot(M3, se = TRUE) anova(M3) gam.check(M3) ## Estimación de dos perfiles paralelos, uno para cada estación fID <-factor(ID) M4 <- gam(So ~ s(De) + fID, subset = I1) # Modelo común para ambas estaciones summary(M4) # Mismo patrón, con diferencia constante 12.296 entre ambas estaciones anova(M4) AIC(M3,M4) par(mar=c(2,2,2,2)) vis.gam(M4,theta=120,color="heat") # Permite ver simultáneamente los dos ajustes ## Validación del modelo gam.check(M4) ## Modelo con interacción entre estación y profundidad M5<-gam(So ~ s(De)+ s(De, by = as.numeric(ID == 13)), subset = I1) ## Aquí s(De) es el suavizador para la primera estación y s(De, by = ...) representa la diferencia de la segunda respecto a la primera. anova(M5) plot(M5) gam.check(M5) ## Comparación entre el modelo sin y con interacciones anova(M4,M5, test="F") AIC(M4) AIC(M5) ## El modelo anterior puede parametrizarse también mediante un suavizador distinto para cada estación: M6 <- gam(So~s(De,by = as.numeric(ID== 8))+ s(De,by = as.numeric(ID== 13))-1, subset=I1) #O, de forma equivalente: M6 <- gam(So~s(De,by = ID) + factor(ID),subset=I1)