##########################
#Install and load packages
##########################

#####
#RENV
#####

##Download renv package if necessary (our version = 0.17.3)
#Get currently installed packages
pkg <- installed.packages()[,1] 

#Check if renv is installed, else download it
if (!("renv" %in% pkg)) {
  if (!requireNamespace("remotes", quietly = TRUE)) {
    install.packages("remotes")
  }
  remotes::install_version("renv", version = "0.17.3")
  print('Installed package: renv (v0.17.3)')
} else {
  print('renv is already installed')
}
## [1] "renv is already installed"
#load renv package
library(renv)
#Set directory to location renv lock file
setwd("/home/genomics/rolbrechts/00_Data/01_First_analysis/10_Phylogenetic_tree/R_Markdown/")
renv::activate()
#Download packages from lockfile (answer yes in console)
renv::restore(packages = c('ggplot','ape','BAT'))
## * The library is already synchronized with the lockfile.
renv::restore(packages = 'ggtree')
## Installing ggtree [3.6.2] ...
##  OK [linked cache in 0.26 milliseconds]
##############
#Load packages
##############
library(BAT)
library(ape)
library(ggplot2)
library(ggtree)
##########
#Read data
##########

#Read in distance matrix file
dist_matrix <- as.matrix(read.table('InversedDistances_Jaccard.dist', row.names = 1))
##################
#Neighbour joining
##################

#Create NJ tree from distance matrix
NJ_tree <- nj(dist_matrix)

#Root tree by selecting an appropriate outgroup 
#Put samples from desired outgroup in vector
#E.g. with grep
outgroup <- grep('Psilanthus_wild_plot_BR', row.names(dist_matrix), value = TRUE)
#E.g. manually
#outgroup <- c('outgroup_sample1','outgroup_sample2', ...)

#Root the tree
NJ_tree_rooted <- root(NJ_tree, outgroup = outgroup, resolve.root = TRUE)

#Possible error: the specified outgroup is not monophyletic
# => Trim some of the outgroup samples until they are all from the same clade 
# => root tree again

#Check if negative edge lengths are present in the tree:
negative_branches <- sum(NJ_tree_rooted$edge.length < 0)

#If present: convert negatives to 0 and shorten only the two branches immediately below it 
#by the same absolute amount with the BAT package
if (negative_branches != 0) {
  NJ_tree_rooted <- tree.zero(NJ_tree_rooted)
}

##############
#Plotting tree
##############

#Add population info to the NJ tree using an annotation file
annot <- read.table("Annotations.txt",header = T, sep = "\t")

#Merge the annotation file with the labels of the tree by the Sample_ID
merged <- merge(annot, data.frame(Sample_ID = NJ_tree_rooted$tip.label),
                by = 'Sample_ID')

#create empty list to store the group of each sample
groups <- list()

#create a key for each group in the list and add the respective samples as values
#(merged$Collection contains the population IDs)
for (group in unique(merged$Collection)) {
    groups[[group]] <- merged$Sample_ID[merged$Collection == group]
}

#Add group info to the NJ tree object
NJ_tree_rooted <- groupOTU(NJ_tree_rooted, groups)

#Set plotting colors for each group
colors <- setNames(c("orange","#FF0099","#0066FF","#00FFFF","darkgreen"),
                   levels(factor(names(groups))))

#Create ggtree plot
#Set layout to your preferred configuration, we chose 'circular'
phyl_plot_NJ <- ggtree(NJ_tree_rooted, aes(color=group), layout = 'circular') +
    #Color dendogram and samples names, add title to legend
    scale_color_manual(values=colors, name = 'Group') + 
    #Add sample labels, placed around the circle (angle)
    geom_tiplab(size=1, aes(angle=angle)) +
    ggtitle("Phylogenetic tree using Neighbour Joining") +
    theme(legend.position="right",
            plot.title = element_text(hjust = 0.5)) 

#Save to PNG
ggsave("phyl_tree_NJ.png", phyl_plot_NJ, width = 12, height = 10, dpi = 1200)

##############
#Control tree
##############

###Check how well the tree represents the distances from the distance matrix
##Extract original distances out of matrix 
#Duplicate matrix
x <- dist_matrix
#Remove the diagonal and upper half of the matrix (don't need duplicates and diagonal of zeros)
x[upper.tri(x, diag = TRUE)] <- NA
x <- x[!is.na(x)]

#Extract distances from created tree (cophenetic computes distances between the tips of the trees)
y <- as.vector(as.dist(cophenetic(NJ_tree_rooted)))

###Plot the two distances from the matrix and tree in function of each other
##Create prediction interval for the plot (confidence level can be adjusted)
confidence_lvl <- 0.95
model <- lm(y~x)

#Create new x-values for prediction
new_x <- seq(min(x), max(x), length.out = 100)

#Predict upper and lower bounds of y-values based on the new x-values, with certain confidence level
pred <- predict(model, newdata = data.frame(x = new_x), 
                interval = "predict", level = confidence_lvl)

##Create the plot and save as png
png("NJ_control.png", width = 24, height = 20, units = 'cm', res = 1200)

control_plot <- plot(x, y, xlab="Original pairwise distances", ylab="Tree pairwise distances",
    main="Neighbour Joining", pch=20, col='black', cex=3)
#Add a trendline
abline(model, col="red", lwd = 2.5)
#Add preciction interval (lower bound)
abline(lm(pred[,2]~new_x), col='blue', lwd = 2.5)
#Add preciction interval (upper bound)
abline(lm(pred[,3]~new_x), col='blue', lwd = 2.5)
#Add the coefficient of determination to the plot title
mtext(paste0("R²: ",round(cor(x,y)^2,3)))

dev.off()
## png 
##   2