##########################
#Install and load packages
##########################
#####
#RENV
#####
##Download renv package if necessary (our version = 0.17.3)
#Get currently installed packages
pkg <- installed.packages()[,1]
#Check if renv is installed, else download it
if (!("renv" %in% pkg)) {
if (!requireNamespace("remotes", quietly = TRUE)) {
install.packages("remotes")
}
remotes::install_version("renv", version = "0.17.3")
print('Installed package: renv (v0.17.3)')
} else {
print('renv is already installed')
}
## [1] "renv is already installed"
#load renv package
library(renv)
#Set directory to location renv lock file
setwd("/home/genomics/rolbrechts/00_Data/01_First_analysis/10_Phylogenetic_tree/R_Markdown/")
renv::activate()
#Download packages from lockfile (answer yes in console)
renv::restore(packages = c('ggplot','ape','BAT'))
## * The library is already synchronized with the lockfile.
renv::restore(packages = 'ggtree')
## Installing ggtree [3.6.2] ...
## OK [linked cache in 0.26 milliseconds]
##############
#Load packages
##############
library(BAT)
library(ape)
library(ggplot2)
library(ggtree)
##########
#Read data
##########
#Read in distance matrix file
dist_matrix <- as.matrix(read.table('InversedDistances_Jaccard.dist', row.names = 1))
##################
#Neighbour joining
##################
#Create NJ tree from distance matrix
NJ_tree <- nj(dist_matrix)
#Root tree by selecting an appropriate outgroup
#Put samples from desired outgroup in vector
#E.g. with grep
outgroup <- grep('Psilanthus_wild_plot_BR', row.names(dist_matrix), value = TRUE)
#E.g. manually
#outgroup <- c('outgroup_sample1','outgroup_sample2', ...)
#Root the tree
NJ_tree_rooted <- root(NJ_tree, outgroup = outgroup, resolve.root = TRUE)
#Possible error: the specified outgroup is not monophyletic
# => Trim some of the outgroup samples until they are all from the same clade
# => root tree again
#Check if negative edge lengths are present in the tree:
negative_branches <- sum(NJ_tree_rooted$edge.length < 0)
#If present: convert negatives to 0 and shorten only the two branches immediately below it
#by the same absolute amount with the BAT package
if (negative_branches != 0) {
NJ_tree_rooted <- tree.zero(NJ_tree_rooted)
}
##############
#Plotting tree
##############
#Add population info to the NJ tree using an annotation file
annot <- read.table("Annotations.txt",header = T, sep = "\t")
#Merge the annotation file with the labels of the tree by the Sample_ID
merged <- merge(annot, data.frame(Sample_ID = NJ_tree_rooted$tip.label),
by = 'Sample_ID')
#create empty list to store the group of each sample
groups <- list()
#create a key for each group in the list and add the respective samples as values
#(merged$Collection contains the population IDs)
for (group in unique(merged$Collection)) {
groups[[group]] <- merged$Sample_ID[merged$Collection == group]
}
#Add group info to the NJ tree object
NJ_tree_rooted <- groupOTU(NJ_tree_rooted, groups)
#Set plotting colors for each group
colors <- setNames(c("orange","#FF0099","#0066FF","#00FFFF","darkgreen"),
levels(factor(names(groups))))
#Create ggtree plot
#Set layout to your preferred configuration, we chose 'circular'
phyl_plot_NJ <- ggtree(NJ_tree_rooted, aes(color=group), layout = 'circular') +
#Color dendogram and samples names, add title to legend
scale_color_manual(values=colors, name = 'Group') +
#Add sample labels, placed around the circle (angle)
geom_tiplab(size=1, aes(angle=angle)) +
ggtitle("Phylogenetic tree using Neighbour Joining") +
theme(legend.position="right",
plot.title = element_text(hjust = 0.5))
#Save to PNG
ggsave("phyl_tree_NJ.png", phyl_plot_NJ, width = 12, height = 10, dpi = 1200)
##############
#Control tree
##############
###Check how well the tree represents the distances from the distance matrix
##Extract original distances out of matrix
#Duplicate matrix
x <- dist_matrix
#Remove the diagonal and upper half of the matrix (don't need duplicates and diagonal of zeros)
x[upper.tri(x, diag = TRUE)] <- NA
x <- x[!is.na(x)]
#Extract distances from created tree (cophenetic computes distances between the tips of the trees)
y <- as.vector(as.dist(cophenetic(NJ_tree_rooted)))
###Plot the two distances from the matrix and tree in function of each other
##Create prediction interval for the plot (confidence level can be adjusted)
confidence_lvl <- 0.95
model <- lm(y~x)
#Create new x-values for prediction
new_x <- seq(min(x), max(x), length.out = 100)
#Predict upper and lower bounds of y-values based on the new x-values, with certain confidence level
pred <- predict(model, newdata = data.frame(x = new_x),
interval = "predict", level = confidence_lvl)
##Create the plot and save as png
png("NJ_control.png", width = 24, height = 20, units = 'cm', res = 1200)
control_plot <- plot(x, y, xlab="Original pairwise distances", ylab="Tree pairwise distances",
main="Neighbour Joining", pch=20, col='black', cex=3)
#Add a trendline
abline(model, col="red", lwd = 2.5)
#Add preciction interval (lower bound)
abline(lm(pred[,2]~new_x), col='blue', lwd = 2.5)
#Add preciction interval (upper bound)
abline(lm(pred[,3]~new_x), col='blue', lwd = 2.5)
#Add the coefficient of determination to the plot title
mtext(paste0("R²: ",round(cor(x,y)^2,3)))
dev.off()
## png
## 2