library(tidyverse)
## Registered S3 methods overwritten by 'ggplot2':
##   method         from 
##   [.quosures     rlang
##   c.quosures     rlang
##   print.quosures rlang
## Registered S3 method overwritten by 'rvest':
##   method            from
##   read_xml.response xml2
## ── Attaching packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.1       ✔ purrr   0.3.2  
## ✔ tibble  2.1.1       ✔ dplyr   0.8.0.1
## ✔ tidyr   0.8.3       ✔ stringr 1.4.0  
## ✔ readr   1.3.1       ✔ forcats 0.4.0
## ── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
RESEARCH_HOME = "/home/and/Documents/PhD/Research/"
FILES_PATH = "Scripts/R/Benchmarks/MultiAndSingleNode/R12/dblab/"
MONITOR_FILE = "monitor.log"
NOHUP_FILE = "nohup.out"
SEPARATOR_ID = "-"

lines = readLines(paste0(RESEARCH_HOME, FILES_PATH, NOHUP_FILE))
lines = lines[grepl("\\|[1-6]\\.", lines)]
nohup = as_tibble(lines) %>%
  separate(value, into=c("Timestamp", "ID", "Time", "Stage", "Duration", "Load", "TS"), sep="\\|") %>%
  separate(ID, into=c(NA, NA, "ID"), sep = SEPARATOR_ID) %>%
  mutate(ID = as.numeric(ID), Time = as.numeric(Time), Duration = as.numeric(Duration)) 
## Warning: Calling `as_tibble()` on a vector is discouraged, because the behavior is likely to change in the future. Use `tibble::enframe(name = NULL)` instead.
## This warning is displayed once per session.
nohupStages = nohup %>% mutate(Stage = paste0(TS,".",str_trim(Stage))) %>%
  mutate(Start = Time - Duration, End = Time) %>%
  select(ID, Stage, Start, End, Duration) 

nohupTimeintervals = nohup %>% select(ID, TS, Time, Duration) %>%
  mutate(Start = Time - Duration, End = Time) %>%
  group_by(ID, TS) %>% summarise(Start = min(Start), End = max(End))

checkF <- function(ID, Stage, TS, Time, Start, End){ 
  check = Start < Time && Time < End
  x <- as_tibble(check) 
}

getStageStart <- function(d){
  join = d %>% left_join(nohupTimeintervals, by = "ID")
  checks = join %>% select(ID, Stage, TS, Time, Start, End) %>% pmap_dfr(checkF)
  starts = cbind(join, checks) %>% filter(value) %>% 
    mutate(Stage = paste0(TS,".",Stage)) %>%
    select(Stage, Nodes, Time, y) %>% 
    group_by(Stage, Nodes) %>% 
    summarise(Start=min(Time), y=min(y)) %>% arrange(Start)
  return(starts)
}
lines = readLines(paste0(RESEARCH_HOME, FILES_PATH, MONITOR_FILE))
lines = lines[grepl("\\|SCALE\\|", lines)]
monitor = as_tibble(lines) %>%
  separate(value, into=c("Timestamp", "Scale", "Time", "ID", "Nodes", "StageID", "Stage", "RDDs", "Task", "Dura", "Load"), sep="\\|") %>%
  separate(ID, into=c(NA, NA, "ID"), sep = SEPARATOR_ID) %>%
  select(ID, Time, Nodes, StageID, Stage, RDDs, Task, Load) %>%
  mutate(ID=as.numeric(ID), Time=as.numeric(Time), StageID = as.numeric(StageID), RDDs=as.numeric(RDDs), Tasks=as.numeric(Task), Load=as.numeric(Load)) %>%
  group_by(ID, Time, Nodes, StageID, Stage) %>% summarise(RDDs=mean(RDDs), Tasks=mean(Tasks), Load=mean(Load)) %>%
  filter(StageID != -1)
monitor$Nodes = (monitor$ID %% 3) + 1
head(monitor)
d = monitor %>% filter(ID %in% c(6,7,8)) %>% ungroup %>% 
  mutate(y = Load, Nodes = factor(Nodes)) %>%
  select(ID, Time, y, Nodes, Stage) %>% 
  arrange(Time) 

stageStarts =getStageStart(d)
p = ggplot(data = d, aes(x = Time, y = y, group = 1, color = Nodes, linetype = Nodes)) +
  geom_line() +
  geom_point(data = stageStarts, aes(x=Start, y=y, group=1, color=Nodes, text = paste(Stage,"<br>Start:",Start,"<br>",y))) +
  labs(x = "Time(s)", y="Load (MB)")
## Warning: Ignoring unknown aesthetics: text
ggplotly(p, tooltip = c("text"))