library(tidyverse)
## Registered S3 methods overwritten by 'ggplot2':
##   method         from 
##   [.quosures     rlang
##   c.quosures     rlang
##   print.quosures rlang
## Registered S3 method overwritten by 'rvest':
##   method            from
##   read_xml.response xml2
## ── Attaching packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.1       ✔ purrr   0.3.2  
## ✔ tibble  2.1.1       ✔ dplyr   0.8.0.1
## ✔ tidyr   0.8.3       ✔ stringr 1.4.0  
## ✔ readr   1.3.1       ✔ forcats 0.4.0
## ── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
getStarts <- function(d){
  s = d %>% select(Stage, Nodes, Time, y) %>% 
    group_by(Stage, Nodes) %>% 
    summarise(Start=min(Time), y=min(y)) %>% arrange(Start)
  return(s)
}
RESEARCH_HOME = "/home/and/Documents/PhD/Research"
lines = readLines(paste0(RESEARCH_HOME, "/Scripts/R/Benchmarks/MultiAndSingleNode/R12/aws/monitor.txt"))
lines = lines[grepl("\\|SCALE\\|", lines)]
monitor = as_tibble(lines) %>%
  separate(value, into=c("Timestamp", "Scale", "Time", "ID", "Nodes", "Stage", "RDDs", "Task", "Dura", "Load"), sep="\\|") %>%
  separate(ID, into=c(NA, NA, "ID"), sep="_") %>%
  select(ID, Time, Nodes, Stage, RDDs, Task, Load) %>%
  mutate(Time=as.numeric(Time), RDDs=as.numeric(RDDs), Tasks=as.numeric(Task), Load=as.numeric(Load)) %>%
  group_by(ID, Time, Nodes, Stage) %>% summarise(RDDs=mean(RDDs), Tasks=mean(Tasks), Load=mean(Load))
## Warning: Calling `as_tibble()` on a vector is discouraged, because the behavior is likely to change in the future. Use `tibble::enframe(name = NULL)` instead.
## This warning is displayed once per session.
head(monitor)
d = monitor %>% filter(ID %in% c("0028","0029","0030")) %>% ungroup %>% 
  mutate(y = Load) %>%
  select(Time, y, Nodes, Stage) %>% 
  arrange(Time) 
starts = getStarts(d)
starts$Label = ""
starts[starts$Stage == "count at MF.scala:194", "Label"] = "<br>Maximal disk find start"
starts[starts$Stage == "rdd at MF.scala:223", "Label"] = "<br>Maximal disk find end"

p = ggplot(data = d, aes(x = Time, y = y, group = 1, color = Nodes, linetype = Nodes)) +
  geom_line() +
  geom_point(data = starts, aes(x=Start, y=y, group=1, color=Nodes, text = paste0(Stage,"<br>Start: ",Start,", Load: ",y,"<br>",Label))) +
  labs(x="Time(s)", y="Load (Mb)")
## Warning: Ignoring unknown aesthetics: text
ggplotly(p, tooltip = c("text"))