
library(tidyverse)
library(reshape2)
library(tidyr)
library(MMWRweek)

#2018
country_iso='NER'
file_name=paste(country_iso,'UNICEF_2018.csv',sep='_')
data=read.csv(file_name,fileEncoding = 'UTF-8-BOM',na.strings=c("","NA"))

data=data[,!colnames(data)%in%c('Row.Labels')]
new_data_2018=
  data%>%
  unite(Location,
        who_region,
        Country,
        ISO_A1,
        ISO_A2,
        ISO_A3,
        na.rm=T,
        sep='::')%>%
  melt(id.vars="Location")%>%
  arrange(Location)%>%
  mutate(year=2018,
         epiweek=parse_number(as.character(variable),
                              locale=locale(grouping_mark=". ", decimal_mark=",")),
         TL=MMWRweek2Date(MMWRyear = year,
                          MMWRweek = epiweek,
                          MMWRday = 2),
         TR=TL+6,
         Primary=T,
         Phantom=F,
         Report="2018_weekly_csv_report")%>%
  rename(sCh=value)%>%
  select(Location,TL,TR,sCh,Primary,Phantom,Report)%>%
  subset(is.na(sCh)==F)

  write.csv(new_data_2018,paste0('OC_',file_name),na='',row.names = F)

#2019
country_iso='NER'
file_name=paste(country_iso,'UNICEF_2019.csv',sep='_')
data=read.csv(file_name,fileEncoding = 'UTF-8-BOM',na.strings=c("","NA"))

data=data[,!colnames(data)%in%c('Row.Labels')]
new_data_2019=
  data%>%
  unite(Location,
        who_region,
        Country,
        ISO_A1,
        ISO_A2,
        ISO_A3,
        na.rm=T,
        sep='::')%>%
  melt(id.vars="Location")%>%
  arrange(Location)%>%
  mutate(year=2019,
         epiweek=parse_number(as.character(variable),
                              locale=locale(grouping_mark=". ", decimal_mark=",")),
         TL=MMWRweek2Date(MMWRyear = year,
                          MMWRweek = epiweek,
                          MMWRday = 2),
         TR=TL+6,
         Primary=T,
         Phantom=F,
         Report="2019_weekly_csv_report")%>%
  rename(sCh=value)%>%
  select(Location,TL,TR,sCh,Primary,Phantom,Report)%>%
  subset(is.na(sCh)==F)

  write.csv(new_data_2019,paste0('OC_',file_name),na='',row.names = F)

  
new_data=rbind(new_data_2018,new_data_2019)  
  
#check if the locations exist in the database
all_loc=read.csv('all_loctions_in_db_2021-04-15.csv')
all(new_data$Location%in%all_loc$region)

#new locations
new_location=data.frame(new_location=unique(new_data$Location[!new_data$Location%in%all_loc$region]))
new_location$match_location=NA
new_location=new_location%>%
  rowwise()%>%
  mutate(
    match_location=paste(all_loc$region[RecordLinkage::levenshteinSim(new_location,all_loc$region)==max(RecordLinkage::levenshteinSim(new_location,all_loc$region))],collapse="|")
    
  )