## creating taxonomy file for Somalia weekly data from cholera platform
library(pacman)
p_load(readxl,readr,dplyr,here,tidyr,aweek,stringr)
here()

## note that we are temporarily labeling Weeks 1-22 cumulative as W22 in 2017 document
dat_2017 = read_xlsx(here("2017 Cholera Data Set.xlsx"),range="A4:BG18",
                     col_names = c("country",
                                   paste0("W",rep(sprintf("%.2d",22:49),each=2),"-",
                                          rep(c("C","D"),time=length(22:49))),"Cum-C","Cum-D"))  %>% 
  filter(country=="Somalia")

dat_2018 = read_xls(here("2018 Cholera Data set.xls"),range="A3:DC13",
                    col_names = c("country",
                                  paste0("W",rep(sprintf("%.2d",1:52),each=2),"-",
                                         rep(c("C","D"),time=52)),"Cum-C","Cum-D")) %>% 
  filter(country=="Somalia")

dat_2019 = read_xls(here("2019 Cholera Data set.xls"),
                    range="A3:AK14",
                    col_names = c("country",
                                   paste0("W",rep(sprintf("%.2d",1:17),each=2),"-",
                                          rep(c("C","D"),time=17)),"Cum-C","Cum-D")) %>% 
  filter(country=="Somalia")


cases = bind_rows(
  dat_2019 %>% select(ends_with("-C")) %>% select_all(~gsub("-C","",.)) %>% gather(week,sCh) %>% mutate(week = paste0(2019,"-",week)),
  dat_2018 %>% select(ends_with("-C")) %>% select_all(~gsub("-C","",.)) %>% gather(week,sCh) %>% mutate(week = paste0(2018,"-",week)),
  dat_2017 %>% select(ends_with("-C")) %>% select_all(~gsub("-C","",.)) %>% gather(week,sCh) %>% mutate(week = paste0(2017,"-",week)))

deaths = bind_rows(
  dat_2019 %>% select(ends_with("-D")) %>% select_all(~gsub("-D","",.)) %>% gather(week,deaths) %>% mutate(week = paste0(2019,"-",week)),
  dat_2018 %>% select(ends_with("-D")) %>% select_all(~gsub("-D","",.)) %>% gather(week,deaths) %>% mutate(week = paste0(2018,"-",week)),
  dat_2017 %>% select(ends_with("-D")) %>% select_all(~gsub("-D","",.)) %>% gather(week,deaths) %>% mutate(week = paste0(2017,"-",week)))

rc <- full_join(cases,deaths) %>% 
  mutate(who_region = "EMR",ISO_A1 = "SOM",TL = week2date(week,week_start=1),TR = TL + 6,Primary=TRUE)

## now deal with the cumulatives 
## assuming they are correct when the sums of the cases does not equal the cumulative number listed...
rc <- rc %>%  mutate(TL = if_else(week == "2019-Cum",week2date("2019-W01"),
                            if_else(week == "2018-Cum",week2date("2018-W01"),
                            if_else(week == "2017-Cum",week2date("2017-W01"),
                            TL
                           ))),
               TR = if_else(week == "2019-Cum",week2date("2019-W16")+6,
                            if_else(week == "2018-Cum",week2date("2018-W52")+6,
                                    if_else(week == "2017-Cum",week2date("2017-W52")+6,
                                            TR
                                    )))
               )

## finally deal with the 2017 dates
rc <- rc %>% mutate(TL = if_else(week == "2017-W22",week2date("2017-W01"),TL))

## add phantom col
rc <- rc %>% mutate(Phantom = NA)

write_csv(rc,here("20375.csv"),na = "")



