This is the entry point for the paper “Measuring the Landscape of Civil War.”

In this file, a raw csv file of the events dataset created for the Mau Mau rebellion is loaded and processed.

Load Library

rm(list=ls()); gc() #clear objects from memory
library(MeasuringLandscape)
library(tidyverse) #load independently just to make sure %>% gets imported
knitr::opts_knit$set(progress = TRUE, verbose = TRUE)
knitr::opts_chunk$set(fig.width = 12, fig.height = 8, warning = FALSE, message = FALSE, cache = TRUE)
options(width = 160)

Load Events Data

events <- MeasuringLandscape:::prep_events(fromscratch = F)
dim(events)
[1] 10469    25

Dates

Basic cleaning. Format is usually DD.MM.YYYY but sometimes multiple days are included by DD1/DD2/MM/YY. Sometimes year is YY or YYYY.

#p_load(date)
events <- events %>%
          dplyr::mutate(event_date_clean=event_date) %>%
          dplyr::mutate(event_date_clean=stringr::str_replace_all(event_date_clean,"[[:digit:]]+/", "")) %>%
          dplyr::mutate(event_date_clean=stringr::str_replace_all(event_date_clean,"[[:digit:]]+/", "")) %>% #strip off extra day at the front 01/02.12.1950
          dplyr::mutate(event_date_clean=stringr::str_replace_all(event_date_clean,"\\.", "/")) %>% #Convert periods to slashes
          dplyr::mutate(event_date_clean=trimws(event_date_clean)) %>% #trim whitespace
          dplyr::mutate(event_date_clean=stringr::str_replace_all(event_date_clean,"/52", "/1952")) %>% #convert 2 digit years to 4 digit years
          dplyr::mutate(event_date_clean=stringr::str_replace_all(event_date_clean,"/53", "/1953")) %>%
          dplyr::mutate(event_date_clean=stringr::str_replace_all(event_date_clean,"/54", "/1954")) %>%
          dplyr::mutate(event_date_clean=stringr::str_replace_all(event_date_clean,"/55", "/1955")) %>%
          dplyr::mutate(event_date_clean=stringr::str_replace_all(event_date_clean,"/56", "/1956")) %>%
          dplyr::mutate(event_date_clean=stringr::str_replace_all(event_date_clean,"/19524", "/1954")) %>% #clean typo
          dplyr::mutate(event_date_clean= lubridate::dmy(event_date_clean) ) #Feed to lubridate
 67 failed to parse.
  
events %>% filter(is.na(event_date_clean)) %>% dplyr::select(starts_with("event_date")) %>% distinct() %>% print(n=40) #visualize errors
events$event_date_clean_year <- lubridate::year(events$event_date_clean)
events$event_date_clean_year %>% janitor::tabyl() %>% round(3)
    .    n percent valid_percent
  953    1   0.000         0.000
 1855    1   0.000         0.000
 1952  157   0.015         0.020
 1953 3025   0.289         0.384
 1954 3165   0.302         0.402
 1955 1330   0.127         0.169
 1956  112   0.011         0.014
 1957   27   0.003         0.003
 1958    7   0.001         0.001
 1985    1   0.000         0.000
 1995   18   0.002         0.002
 2005    1   0.000         0.000
 2013    2   0.000         0.000
 2023    1   0.000         0.000
 2057   21   0.002         0.003
 2058    8   0.001         0.001
 2060    1   0.000         0.000
 2061    1   0.000         0.000
   NA 2590   0.247            NA

How often are event dates missing?

table(is.na(events$event_date))

FALSE  TRUE 
 7946  2523 

The documents also have dates, sometimes spanning a period of time. Can use that to nail down missing dates.

(events$document_date_type <- events$document_date %>% 
                             tolower() %>% 
                             mosaic::derivedFactor(
                                          "unknown" = T,
                                          "missing"     = stringr::str_detect(.,"obscured|missing|illegible|xx|Document missing"),
                                          "on the"      = stringr::str_detect(.,"on the"),
                                          "to"          = stringr::str_detect(.," to"),
                                          "for"         = stringr::str_detect(.,"For "),
                                          "week"        = stringr::str_detect(.,"week"),
                                          "week ending" = stringr::str_detect(.,"week ending"),
                                          "period"      = stringr::str_detect(.,"period"),
                                          "fortnight"   = stringr::str_detect(.,"fortnight"),
                                          "ending"      = stringr::str_detect(.,"ending"),
                                          .method = "last",
                                          .default = "unknown"
                            ) 
 ) %>% janitor::tabyl() 
           .    n     percent
                0 0.000000000
     unknown  877 0.083771134
     missing  324 0.030948515
      on the  101 0.009647531
          to 1640 0.156652975
         for    0 0.000000000
        week  130 0.012417614
 week ending    0 0.000000000
      period  226 0.021587544
   fortnight  562 0.053682300
      ending 6609 0.631292387
events$document_date_clean <- events$document_date %>% tolower() %>% 
                             stringr::str_replace_all("Fortnight Ended |period|week ending|for |the |fortnight |ending |week |From |on ","") %>%
                             stringr::str_replace_all("[Digits]*th|[Digits]*st|[Digits]*rd|[Digits]*nd","")
events <- events %>% 
         dplyr::select(-one_of("document_date_1","document_date_2")) %>%  #separate will continue to add columns every time its run
                              tidyr ::separate(col=document_date_clean,
                                        into=c("document_date_1","document_date_2"),
                                        sep = " to|to |To | - ", remove=F, extra="drop", fill="right")
Unknown columns: `document_date_1`, `document_date_2`
events$document_date_clean_1 <- events$document_date_1 %>% 
                                 stringr::str_replace_all("[[:digit:]]+/", "")   %>% #strip off extra day at the front 01/02.12.1950
                                 stringr::str_replace_all("\\.", "/")             %>% #Convert periods to slashes
                                 trimws() %>%                            
                                 lubridate::dmy()
 2356 failed to parse.
events$document_date_clean_2 <- events$document_date_2 %>% 
                                 stringr::str_replace_all("[[:digit:]]+/", "")   %>% #strip off extra day at the front 01/02.12.1950
                                 stringr::str_replace_all("\\.", "/")             %>% #Convert periods to slashes
                                 trimws() %>%                            
                                 lubridate::dmy()    
 257 failed to parse.
events %>% filter(is.na(document_date_clean_1)) %>% dplyr::select(starts_with("document_date")) %>% distinct() %>% print(n=40) #visualize errors
events$document_date_best_date <- events$document_date_clean_2
condition <- is.na(events$document_date_best_date)
events$document_date_best_date[condition] <- events$document_date_clean_1[condition]
(events$document_date_best_year <- lubridate::year(events$document_date_best_date)) %>% janitor::tabyl() %>% round(3)
    .    n percent valid_percent
 1952  101   0.010         0.011
 1953 3275   0.313         0.348
 1954 3751   0.358         0.398
 1955 1892   0.181         0.201
 1956  281   0.027         0.030
 1957   68   0.006         0.007
 1958   26   0.002         0.003
 1959    4   0.000         0.000
 1960    4   0.000         0.000
 1961    4   0.000         0.000
 2052   10   0.001         0.001
   NA 1053   0.101            NA

Type of Event

cat("\014")

events$type_clean <- stringr::str_trim(stringi::stri_trans_totitle(events$type))
events$type_clean_agglow <- events$type_clean %>%
  stringr::str_trim() %>%
  tolower() %>%
  forcats::fct_collapse( #removed Car dependency for forcats
             'desertion'='desertion',
             'escape'='escape',
             'abduction'=c('abduction','kidnapping','kidnap','kitnap','kindnap'),
             'assault'=c('assault','attack','assaulted','assaults','assualt','assult'),
             'murder'=c('murder','elimination','kidnap / murder',''),
             'arson'=c('arson','burn'),
             'cattle slashing'=c('slashed','stampede'),
             'vandalism'='vandalism',
             'theft'=c('theft','thefts','thet','missing','lost','entry'),
             'punishment'=c('confiscate','sentenced'),
             'rebel capture'=c('capture','captured'),
             'oathing'=c('oath','oathing','recruitment','recruited'),
             'contact'=c('contact','caontact','contacts','drove off','drive off','drove  off',
              'chased off','broke up oathing','ambush'),
             'patrol'=c('patrol','police and kpr patrol','sweep'),
             'screening'=c('screening','sreening'),
             'unclassified'=c('type')
)

Unknown levels in f: kidnapping, , recruitment, patrol

events$type_clean_agglow %>%
  janitor::tabyl(sort = TRUE) %>%
  janitor::adorn_crosstab(.,digits = 1)

Factor dat contains implicit NA, consider using forcats::fct_explicit_na‘janitor::adorn_crosstab’ is deprecated. Use ‘use the various adorn_ functions instead. See the “tabyl” vignette for examples.’ instead. See help(“Deprecated”)

Collapse Event Types

(events$type_clean_aggmed <- events$type_clean_agglow %>% forcats::fct_collapse(
                                 'physical violence'=c('abduction','assault','murder'),
                                 'property destruction'=c('vandalism','arson','cattle slashing'),
                                 'theft'=c('theft'),
                                 'security operations'=c('contact','screening','sreening','patrol','punishment'),
                                 'unclassified'=c('desertion','escape','unclassified')
                            )
 ) %>%
  janitor::tabyl(sort = TRUE) %>%
  janitor::adorn_crosstab(digits = 1)
Unknown levels in `f`: sreeningFactor `dat` contains implicit NA, consider using `forcats::fct_explicit_na`'janitor::adorn_crosstab' is deprecated.
Use 'use the various adorn_ functions instead.  See the "tabyl" vignette for examples.' instead.
See help("Deprecated")
NA
(events$type_clean_agghigh <- events$type_clean_aggmed %>% forcats::fct_collapse(
                                 'rebel activity'=c('oathing','physical violence','property destruction','theft'),
                                 'government activity'=c('rebel capture','security operations')
                            )
) %>%
  janitor::tabyl(sort = TRUE) %>%
  janitor::adorn_crosstab(digits = 1)
Factor `dat` contains implicit NA, consider using `forcats::fct_explicit_na`'janitor::adorn_crosstab' is deprecated.
Use 'use the various adorn_ functions instead.  See the "tabyl" vignette for examples.' instead.
See help("Deprecated")

Initiator of Event

Collapsed Initiators to just Rebels, Government, and Civilians

cat("\014")

initiator_target_master_clean <- list(
  
'ammunition'=c('ammunition'),
'explosives'=c('explosives', 'gelignite'),
'firearms'=c('firearms','arms', 'firearm', 'gun', 'pistol', 'rifle', 'ammunition', 'rifile', 'shotgun', 'verey pistol'),
'other weapons'=c('other weapons','axe','scabbard','weapons'),
'colonial authorities'=c('colonial authorities','councillor', 'district commissioner', 'district officer', 'forest ranger', 'game ranger', 'game warden', 'government', 'government employees', 'port authority', 'public works department', 'screening team' , 'do', 'govrnment', 'wakamba screening team', 'do munuga','african do','dcmeru', 'colonial authorities' ,'govtemployee' ),
'tribal authorities'=c('tribal authorities','chief', 'elders', 'headman' , 'chief chostram','chief eliud', "chief's sentry") ,
'private property'=c('private property','buildings', 'cattle dip', 'duka', 'farms', 'garage', 'homes','huts', 'hotel', 'land rover', 'lorry', 'market', 'office', 'oxcart', 'property',  'pump house', 'sawmill', 'shops', 'stores', 'tractor', 'vehicle', 'windmill' , "bullock's farm",'cattle boma','coffe trees','coffee trees', 'cuthouse','dairy farm','dip','house','household', 'houses','hut','instrument','labour camp post','labour huts','lorries','lucerne sheds','maize shamba', 'milk factory','pig sty','private property', 'property of civilians','shop','store','thika fishing camp','vehicles'),
'cash'=c('cash', 'funds', 'money' , "conductor's takings"),
'food'=c('banana', 'barley', 'bran', 'cabbage', 'coffee', 'corn', 'cream', 'crops', 'dairy', 'food', 'fruit', 'grain', 'honey', 'maize',  'meat', 'milk', 'oats', 'posho', 'potatoes', 'sugar', 'vegetable', 'wheat',
'food','food etc','food store','food stores','foodstuffs','fruits','grains', 'grains+cloth +money','green maize cobs','potato','potato store','potatos','skimmed milk','sugar cane','sugar maize','vegetables','vegitable garden', 'vegitables','wheat bags','wheat store','wheet','whisky') ,
'livestock'=c('beast', 'cattle', 'cow', 'herd', 'livestock', 'pig', 'sheep', 'steer', 'stock',
'animal', 'bulls','calf','calves','chicken','cows','donkey','goat','goats',
'head of cattle','head of cow','head of sheep','heifer','heifers',
'lamb','live stock','livestock','livestocks','masai herd','milk cow','ox','ox cart',
'oxen','ram','red poll cattle','shee','sheep or ox','steers','stocks' ),
'medicine'=c('medical supplies', 'medicine', 'm&b tablets', 'medicines'),
'supplies'=c('supplies','bags', 'bedding', 'blankets', 'books', 'charcoal', 'cloth', 'clothing', 'cooking utensils', 'cutlery', 'equipment', 'farm implements', 
'household items','instruments', 'iron', 'pails','petrol', 'provisions', 'oil', 'sacks', 'supplies', 'tarpaulin', 'thatch', 'timber', 
'tobacco', 'tools', 'uniforms', 'wire', 'wireless set', 'whiskey', 'articles','bag','battery','bucket','ciga','cigarettes','clothes',
'clothing etc','cloths','dairy item','dairy record book','goods', 'material','oil+tins','provisionv','railway uniforms','supplies', 'tarpaulian','typewriter','v- drive belts', 'gunny bags' ),
'church'=c('church'),
'infrastructure'=c('airstrip', 'bridges', 'half built village', 'roads', 'trenches', 'water tank','bridge', 'bridge broken', 'bridge damaged', 'infrastructure', 'milt property', 'miltproperty', 'prison camp','stn damaged'),
'school'=c('school', 'school','school building','school house','school property','schools'),
'home guard'=c('bg','kg','eg', 'guard','embu guard', 'farm guard', 'forest guard', 'home guard','ikandine guard', 'kathanjure guard', 'kijabe guard',
'kikuyu guard', 'masai guard', 'meru guard', 'nandi guard', 'nkubu guard', 'stock guard', 'tigoni guard','tp and eg patrol','hg','tp patrol','home guard patrol',
'm', 'm/g','m/g patrol','g', 'kathanjure hg','k g', 'ng', 'eg patrol', 'hg camp','hg leader','hg patrol','hg post','home','home guard','kg post'),
'arab combat units'=c('arab combat' , 'arab combat unit','arab combat units'),
'asian combat units'=c('asian combat', 'asian combat unit', 'asian combat team', 'second asian combat unit','asian combat units' ),
'kings african rifles'=c('kings african rifles','3 kar', '4 kar', '5 kar', '6 kar', '7 kar', '23 kar', '26 kar','k.a.r','k.p.r','k.a.r.', '5th k.a.r','5kar','5 k.a.r','4th kar','kar' ) ,
'british military'=c('british military', 'devonshire regiment','devons', 'field intelligence assistant', 'field intelligence officer', 'fio', 'gloucestershire regiment', 'glosters', 'lancashire fusiliers', 
"king's shropshire light infantry", 'royal east kent regiment', 'buffs', 'royal fusiliers', 'royal highland regiment','black watch', 'watch', 'royal inniskilling fusiliers', 'royal irish fusiliers', 'royal northumberland fusiliers', 'rnf','police and military', 'army' , 'lancashire fusilliers', 'sp company 1 royal innisks', '1 rnf', 'rif', 'ksli', 'inniskillings', 'fia','1 glosters', '1 bw', '1 buffs', 
'\"a\" company 1 royal innisks', '\"a\" company', 'royal fusilers', 'of devons','of 1 glosters', 'lanc fus', 'fusiliers', 'fio kruger','fios','a co devon','4 platoon support company',
'\"c\" company1 royal innisks','6 platoonsp company 1 royal innisks','1 lf', '\"c\" company', '\"d\" company','\"a\"','\"a\" company bw','buffs ambush','d company',
"d' force",'devens', 'c company','\"d\" force', 'army officer', 'british army officer', 'british military', 'buffs patrol', 'european officer', 'european soldiers', 'gloster patrol' ),
'kenya regiment'=c('kenya regiment','captain folliott’s team' , 'kr', 'kenreg', 'kenregg','kenya regiment sergeant', 'kenya regt','keniya regiment','kenya regiment private'),
 'military (generic)'=c('military (generic)', 'captain', 'company', 'military', 'army', 'military property', 'platoon', 'security forces', 'security force', 'coy', 'striking force' ,'sentry', 'non commissioned officers', 'patrol', 'sentrie', 'sgt white' ),
'psuedo gangs'=c('psuedo gangs','pseudo gang', 'pseudo team', 'trojan', 'psuedo gangs', 'trojan team' , 'tracker group', 'pseudo teams'),
'royal air force'=c('royal air force','raf', 'bombers', 'air strike', 'harvards', 'raf lincolns','flying squard'),
'paramilitary'=c('paramilitary','general service unit', 'gsu' ),
'cid'=c('cid'),
'kenya police'=c('kenya police', 'kp' , "kp constables' quarters", 'kpa' ),
'kenya police reserve'=c('kenya police reserve', 'kpr', 'kpr officers', 'reserve police officer', 'rpo' , 'rpos', 'police and k.p.r'),
'police (generic)'=c('police (generic)','constable', 'police', 'polce','policy party'),
'railway police'=c('railway police' ),
'special branch'=c('special branch', 'blue doctor team', 'special branch team', 'sb officers' ),
'tribal police'=c('githumu police', 'masai special constable', 'tribal police', 'tp' , 'tpeg','african constable', 'african costable', 'african special constable', 'tribal police'),
'tribal police reserve'=c('tribal police reserve', 'tpr')  ,
'communities'=c('communities','manyatta', 'fishing camp', 'sublocation', 'village', 'camp' , 'villages') ,
'detainees'=c('detainees', 'prisoner', 'prisoners'),
'suspected insurgents'=c('suspected insurgents','bandits', 'food foragers', 'gangs', 'gang', 'kiama kia muingi' , 'kkm', 'komerera' , 'mau mau', 'oath administrator', 'passive wing',
'rebels', 'suspects', 'terrorists','terrorosts','terrorist', 'gunman', 'terorist', 'gunmen', 'resistance group','resistance groups', 'oath administrater','oath administrators','passive wing members', 'resistance','suspect', 'suspected insurgents','terroist','terroists','terrost') ,
'civilians'=c('civilians','africans', 'children', 'civilian', 'driver', 'employees', 'evangelist', 'family', 'farm boys', 'girls', 'informer',
'kikuyu', 'laborour', 'loyalist', 'masai', 'men', 'mission staff', 'owner', 'passengers', 'people',  'tugen tribesmen' , 'stranger', 'sikh',
'herd boys', 'isiolo game scouts', 'farm labour', 'farmer', 'european', 'employer', 'employee', 'civilan','shopkeeper' , 'students', 'teachers',
'turkana', 'vigilantes', 'women', 'workers','villagers',  'labour', 'local labour', 'kikuyus', 'embu', 'tiriki houseboy', 'samburu', 'manager', 'woman',
'vetofficer', 'mrhiggins', 'masai party','kuria tribesmen','manager of akira estates', 'kuria tribesmen','chstephen','african',
'catholic misson staff', 'african staff', 'asian women', 'bus conductor', 'child', 'civilian(food carriers)', 'civilian(schoolmaster)', 'civilians',
'civilion', 'committee', 'committee member',  'courier','elder','embu tractor driver', 'employees of club','engine boy','girl','golf club staff','his own hut',
'hotel keeper','houseboy','illegal residents','indian','interpreter','kem','kikiyu', 'kikuyu assessor','kikuyu families','kikuyu houseboy','kikuyu labourer','kikyu',
'kirua village','labour line','labour lines','labourer','labourers', 'laboures','labourline','labours','males','man','maragoli','maragoli labourer',
'masai elders','masai tribesman','members of the thika committee', 'mna section leaders','municipal inspectors','non kikuyu employees','person',
'prostitutes','purke masai','pwd employee','railway employees', 'school master','school teacher','sisters committee','somali','staff','strangers',
'taxi drivers','teacher','treasurers', "headman's son","norton traill's labour","gordon's labour", 'food carriers') 
)
regex <- "\\.|patrol|[1-9]\\s*rd|[1-9]\\s*th" # with regex start trying to get more of these to automatically map instead of generating lots of hand codings
events$initiator_clean <- events$initiator %>% stringr::str_trim() %>% gsub(regex, "", ., ignore.case =T) %>% tolower()
events <- events %>%
  dplyr::select(-one_of("initiator_clean_1", "initiator_clean_2", "initiator_clean_3")) %>% # separate will continue to add columns every time its run
  tidyr ::separate(
    col = initiator_clean,
    into = c("initiator_clean_1", "initiator_clean_2", "initiator_clean_3"),
    sep = "and|\\\\|/|\\&|,", remove = F, extra = "drop", fill = "right"
  )
Unknown columns: `initiator_clean_1`, `initiator_clean_2`, `initiator_clean_3`
events <- events %>%
  mutate_at(vars(starts_with("initiator_clean_")), funs(gsub(".*police.*", "police", ., ignore.case =T))) %>%
  mutate_at(vars(starts_with("initiator_clean_")), funs(gsub(".*guard.*", "guard", ., ignore.case =T))) %>%
  mutate_at(vars(starts_with("initiator_clean_")), funs(gsub(".*terror.*|.*mau mau.*|.*gang.*", "terrorist", ., ignore.case =T))) %>%
  mutate_at(vars(starts_with("initiator_clean_")), funs(gsub(".*kpr.*|.*k p r.*", "kpr", ., ignore.case =T))) %>%
  mutate_at(vars(starts_with("initiator_clean_")), funs(gsub(".*kar.*|.*k a r.*", "kar", ., ignore.case =T))) %>%
  mutate_at(vars(starts_with("initiator_clean_")), funs(gsub(".*coy.*", "coy", ., ignore.case =T))) %>%
  mutate_at(vars(starts_with("initiator_clean_")), funs(gsub(".*gsu.*", "gsu", ., ignore.case =T))) %>%
  mutate_at(vars(starts_with("initiator_clean_")), funs(gsub(".*watch.*", "watch", ., ignore.case =T))) %>%
  mutate_at(vars(starts_with("initiator_clean_")), funs(trimws(.)))
events <- events %>% 
          mutate(initiator_clean_1_agglow = do.call(forcats::fct_collapse, c(list(events$initiator_clean_1), initiator_target_master_clean)) %>% as.character() ) %>%
          mutate(initiator_clean_2_agglow = do.call(forcats::fct_collapse, c(list(events$initiator_clean_2), initiator_target_master_clean)) %>% as.character()) %>%
          mutate(initiator_clean_3_agglow = do.call(forcats::fct_collapse, c(list(events$initiator_clean_3), initiator_target_master_clean)) %>% as.character())
Unknown levels in `f`: ammunition, explosives, gelignite, firearms, arms, firearm, gun, pistol, rifle, ammunition, rifile, shotgun, verey pistol, other weapons, axe, scabbard, weapons, colonial authorities, councillor, district commissioner, district officer, government employees, port authority, public works department, colonial authorities, govtemployee, tribal authorities, elders, chief's sentry, private property, buildings, cattle dip, duka, farms, garage, homes, huts, hotel, land rover, lorry, market, office, oxcart, property, pump house, sawmill, shops, stores, tractor, vehicle, windmill, bullock's farm, cattle boma, coffe trees, coffee trees, cuthouse, dairy farm, dip, house, household, houses, hut, instrument, labour camp post, labour huts, lorries, lucerne sheds, maize shamba, milk factory, pig sty, private property, property of civilians, shop, store, thika fishing camp, vehicles, cash, funds, money, conductor's takings, banana, barley, bran, cabbage, coffee, corn, cream, crops, dairy, food, fruit, grain, honey, maize, meat, milk, oats, posho, potatoes, sugar, vegetable, wheat, food, food etc, food store, food stores, foodstuffs, fruits, grains, grains+cloth +money, green maize cobs, potato, potato store, potatos, skimmed milk, sugar cane, sugar maize, vegetables, vegitable garden, vegitables, wheat bags, wheat store, wheet, whisky, beast, cattle, cow, herd, livestock, pig, sheep, steer, stock, animal, bulls, calf, calves, chicken, cows, donkey, goat, goats, head of cattle, head of cow, head of sheep, heifer, heifers, lamb, live stock, livestock, livestocks, masai herd, milk cow, ox, ox cart, oxen, ram, red poll cattle, shee, sheep or ox, steers, stocks, medical supplies, medicine, m&b tablets, medicines, supplies, bags, bedding, blankets, books, charcoal, cloth, clothing, cooking utensils, cutlery, equipment, farm implements, household items, instruments, iron, pails, petrol, provisions, oil, sacks, supplies, tarpaulin, thatch, timber, tobacco, tools, uniforms, wire, wireless set, whiskey, articles, bag, battery, bucket, ciga, cigarettes, clothes, clothing etc, cloths, dairy item, dairy record book, goods, material, oil+tins, provisionv, railway uniforms, supplies, tarpaulian, typewriter, v- drive belts, gunny bags, church, airstrip, bridges, half built village, roads, trenches, water tank, bridge, bridge broken, bridge damaged, infrastructure, milt property, miltproperty, prison camp, stn damaged, school, school, school building, school house, school property, schools, embu guard, farm guard, forest guard, home guard, ikandine guard, kathanjure guard, kijabe guard, kikuyu guard, masai guard, meru guard, nandi guard, nkubu guard, stock guard, tigoni guard, tp and eg patrol, tp patrol, home guard patrol, m/g, m/g patrol, g, eg patrol, hg camp, hg leader, hg patrol, hg post, home, home guard, kg post, arab combat, arab combat units, asian combat, asian combat units, kings african rifles, 3 kar, 4 kar, 5 kar, 6 kar, 7 kar, 23 kar, 26 kar, k.a.r, k.p.r, k.a.r., 5th k.a.r, 5kar, 5 k.a.r, 4th kar, british military, devonshire regiment, field intelligence assistant, field intelligence officer, gloucestershire regiment, glosters, king's shropshire light infantry, royal east kent regiment, royal fusiliers, royal highland regiment, black watch, royal inniskilling fusiliers, royal northumberland fusiliers, rnf, police and military, army officer, british army officer, british military, buffs patrol, european officer, european soldiers, gloster patrol, captain folliott’s team, kenya regiment private, military (generic), captain, military property, platoon, non commissioned officers, patrol, sentrie, sgt white, psuedo gangs, pseudo gang, psuedo gangs, royal air force, paramilitary, kenya police, kp, kp constables' quarters, kpa, kenya police reserve, kpr officers, reserve police officer, police and k.p.r, police (generic), railway police, githumu police, masai special constable, tribal police, african constable, african costable, african special constable, tribal police, tribal police reserve, communities, manyatta, fishing camp, sublocation, village, camp, villages, detainees, prisoner, prisoners, suspected insurgents, bandits, food foragers, gangs, gang, kiama kia muingi, kkm, komerera, mau mau, oath administrator, passive wing, suspects, terrorists, terrorosts, oath administrater, oath administrators, passive wing members, resistance, suspect, suspected insurgents, terroist, terroists, terrost, africans, children, driver, employees, evangelist, family, farm boys, girls, informer, laborour, loyalist, mission staff, passengers, people, shopkeeper, students, teachers, vigilantes, women, workers, catholic misson staff, african staff, asian women, bus conductor, child, civilian(food carriers), civilian(schoolmaster), civilion, committee, committee member, courier, elder, embu tractor driver, employees of club, engine boy, girl, golf club staff, his own hut, hotel keeper, houseboy, illegal residents, indian, interpreter, kem, kikiyu, kikuyu assessor, kikuyu families, kikuyu houseboy, kikuyu labourer, kikyu, kirua village, labour line, labour lines, labourer, labourers, laboures, labourline, labours, males, man, maragoli, maragoli labourer, masai elders, masai tribesman, members of the thika committee, mna section leaders, municipal inspectors, non kikuyu employees, person, prostitutes, purke masai, pwd employee, railway employees, school master, school teacher, sisters committee, somali, staff, strangers, taxi drivers, teacher, treasurers, headman's son, gordon's labour, food carriersUnknown levels in `f`: ammunition, explosives, gelignite, firearms, arms, firearm, gun, pistol, rifle, ammunition, rifile, shotgun, verey pistol, other weapons, axe, scabbard, weapons, colonial authorities, councillor, district commissioner, district officer, forest ranger, game ranger, game warden, government employees, port authority, public works department, screening team, govrnment, wakamba screening team, do munuga, african do, dcmeru, colonial authorities, govtemployee, tribal authorities, elders, headman, chief chostram, chief eliud, chief's sentry, private property, buildings, cattle dip, duka, farms, garage, homes, huts, hotel, land rover, lorry, market, office, oxcart, property, pump house, sawmill, shops, stores, tractor, vehicle, windmill, bullock's farm, cattle boma, coffe trees, coffee trees, cuthouse, dairy farm, dip, house, household, houses, hut, instrument, labour camp post, labour huts, lorries, lucerne sheds, maize shamba, milk factory, pig sty, private property, property of civilians, shop, store, thika fishing camp, vehicles, cash, funds, money, conductor's takings, banana, barley, bran, cabbage, coffee, corn, cream, crops, dairy, food, fruit, grain, honey, maize, meat, milk, oats, posho, potatoes, sugar, vegetable, wheat, food, food etc, food store, food stores, foodstuffs, fruits, grains, grains+cloth +money, green maize cobs, potato, potato store, potatos, skimmed milk, sugar cane, sugar maize, vegetables, vegitable garden, vegitables, wheat bags, wheat store, wheet, whisky, beast, cattle, cow, herd, livestock, pig, sheep, steer, stock, animal, bulls, calf, calves, chicken, cows, donkey, goat, goats, head of cattle, head of cow, head of sheep, heifer, heifers, lamb, live stock, livestock, livestocks, masai herd, milk cow, ox, ox cart, oxen, ram, red poll cattle, shee, sheep or ox, steers, stocks, medical supplies, medicine, m&b tablets, medicines, supplies, bags, bedding, blankets, books, charcoal, cloth, clothing, cooking utensils, cutlery, equipment, farm implements, household items, instruments, iron, pails, petrol, provisions, oil, sacks, supplies, tarpaulin, thatch, timber, tobacco, tools, uniforms, wire, wireless set, whiskey, articles, bag, battery, bucket, ciga, cigarettes, clothes, clothing etc, cloths, dairy item, dairy record book, goods, material, oil+tins, provisionv, railway uniforms, supplies, tarpaulian, typewriter, v- drive belts, gunny bags, church, airstrip, bridges, half built village, roads, trenches, water tank, bridge, bridge broken, bridge damaged, infrastructure, milt property, miltproperty, prison camp, stn damaged, school, school, school building, school house, school property, schools, bg, embu guard, farm guard, forest guard, home guard, ikandine guard, kathanjure guard, kijabe guard, kikuyu guard, masai guard, meru guard, nandi guard, nkubu guard, stock guard, tigoni guard, tp and eg patrol, tp patrol, home guard patrol, m/g, m/g patrol, kathanjure hg, k g, ng, eg patrol, hg camp, hg leader, hg patrol, hg post, home, home guard, kg post, arab combat, arab combat unit, arab combat units, asian combat, asian combat team, second asian combat unit, asian combat units, kings african rifles, 3 kar, 4 kar, 5 kar, 6 kar, 7 kar, 23 kar, 26 kar, k.a.r, k.p.r, k.a.r., 5th k.a.r, 5kar, 5 k.a.r, 4th kar, british military, devonshire regiment, devons, field intelligence assistant, field intelligence officer, fio, gloucestershire regiment, glosters, lancashire fusiliers, king's shropshire light infantry, royal east kent regiment, royal fusiliers, royal highland regiment, black watch, royal inniskilling fusiliers, royal irish fusiliers, royal northumberland fusiliers, rnf, police and military, lancashire fusilliers, sp company 1 royal innisks, 1 rnf, rif, ksli, inniskillings, 1 glosters, 1 bw, 1 buffs, "a" company 1 royal innisks, "a" company, royal fusilers, of devons, of 1 glosters, lanc fus, fusiliers, fio kruger, fios, a co devon, 4 platoon support company, "c" company1 royal innisks, 6 platoonsp company 1 royal innisks, 1 lf, "c" company, "d" company, "a", "a" company bw, buffs ambush, d company, d' force, devens, c company, "d" force, army officer, british army officer, british military, buffs patrol, european officer, european soldiers, gloster patrol, captain folliott’s team, kenreg, kenregg, kenya regiment sergeant, keniya regiment, kenya regiment private, military (generic), captain, company, military property, platoon, security forces, security force, striking force, sentry, non commissioned officers, patrol, sentrie, sgt white, psuedo gangs, pseudo gang, pseudo team, trojan, psuedo gangs, trojan team, tracker group, pseudo teams, royal air force, raf, bombers, air strike, harvards, raf lincolns, flying squard, paramilitary, general service unit, cid, kenya police, kp constables' quarters, kpa, kenya police reserve, kpr officers, reserve police officer, rpo, rpos, police and k.p.r, police (generic), constable, polce, policy party, railway police, special branch, blue doctor team, special branch team, sb officers, githumu police, masai special constable, tribal police, tpeg, african constable, african costable, african special constable, tribal police, tribal police reserve, communities, manyatta, fishing camp, sublocation, village, camp, villages, detainees, prisoner, prisoners, suspected insurgents, bandits, food foragers, gangs, gang, kiama kia muingi, kkm, komerera, mau mau, oath administrator, passive wing, rebels, suspects, terrorists, terrorosts, gunman, terorist, gunmen, resistance group, resistance groups, oath administrater, oath administrators, passive wing members, resistance, suspect, suspected insurgents, terroist, terroists, terrost, africans, children, civilian, driver, employees, evangelist, family, farm boys, girls, informer, kikuyu, laborour, loyalist, masai, men, mission staff, owner, passengers, people, tugen tribesmen, stranger, sikh, herd boys, isiolo game scouts, farm labour, farmer, european, employer, employee, civilan, shopkeeper, students, teachers, turkana, vigilantes, women, workers, villagers, local labour, kikuyus, embu, tiriki houseboy, samburu, manager, woman, vetofficer, mrhiggins, masai party, kuria tribesmen, manager of akira estates, kuria tribesmen, chstephen, african, catholic misson staff, african staff, asian women, bus conductor, child, civilian(food carriers), civilian(schoolmaster), civilion, committee, committee member, courier, elder, embu tractor driver, employees of club, engine boy, girl, golf club staff, his own hut, hotel keeper, houseboy, illegal residents, indian, interpreter, kem, kikiyu, kikuyu assessor, kikuyu families, kikuyu houseboy, kikuyu labourer, kikyu, kirua village, labour line, labour lines, labourer, labourers, laboures, labourline, labours, males, man, maragoli, maragoli labourer, masai elders, masai tribesman, members of the thika committee, mna section leaders, municipal inspectors, non kikuyu employees, person, prostitutes, purke masai, pwd employee, railway employees, school master, school teacher, sisters committee, somali, staff, strangers, taxi drivers, teacher, treasurers, headman's son, norton traill's labour, gordon's labour, food carriersUnknown levels in `f`: ammunition, explosives, gelignite, firearms, arms, firearm, gun, pistol, rifle, ammunition, rifile, shotgun, verey pistol, other weapons, axe, scabbard, weapons, colonial authorities, councillor, district commissioner, district officer, forest ranger, game ranger, game warden, government, government employees, port authority, public works department, screening team, do, govrnment, wakamba screening team, do munuga, african do, dcmeru, colonial authorities, govtemployee, tribal authorities, chief, elders, headman, chief chostram, chief eliud, chief's sentry, private property, buildings, cattle dip, duka, farms, garage, homes, huts, hotel, land rover, lorry, market, office, oxcart, property, pump house, sawmill, shops, stores, tractor, vehicle, windmill, bullock's farm, cattle boma, coffe trees, coffee trees, cuthouse, dairy farm, dip, house, household, houses, hut, instrument, labour camp post, labour huts, lorries, lucerne sheds, maize shamba, milk factory, pig sty, private property, property of civilians, shop, store, thika fishing camp, vehicles, cash, funds, money, conductor's takings, banana, barley, bran, cabbage, coffee, corn, cream, crops, dairy, food, fruit, grain, honey, maize, meat, milk, oats, posho, potatoes, sugar, vegetable, wheat, food, food etc, food store, food stores, foodstuffs, fruits, grains, grains+cloth +money, green maize cobs, potato, potato store, potatos, skimmed milk, sugar cane, sugar maize, vegetables, vegitable garden, vegitables, wheat bags, wheat store, wheet, whisky, beast, cattle, cow, herd, livestock, pig, sheep, steer, stock, animal, bulls, calf, calves, chicken, cows, donkey, goat, goats, head of cattle, head of cow, head of sheep, heifer, heifers, lamb, live stock, livestock, livestocks, masai herd, milk cow, ox, ox cart, oxen, ram, red poll cattle, shee, sheep or ox, steers, stocks, medical supplies, medicine, m&b tablets, medicines, supplies, bags, bedding, blankets, books, charcoal, cloth, clothing, cooking utensils, cutlery, equipment, farm implements, household items, instruments, iron, pails, petrol, provisions, oil, sacks, supplies, tarpaulin, thatch, timber, tobacco, tools, uniforms, wire, wireless set, whiskey, articles, bag, battery, bucket, ciga, cigarettes, clothes, clothing etc, cloths, dairy item, dairy record book, goods, material, oil+tins, provisionv, railway uniforms, supplies, tarpaulian, typewriter, v- drive belts, gunny bags, church, airstrip, bridges, half built village, roads, trenches, water tank, bridge, bridge broken, bridge damaged, infrastructure, milt property, miltproperty, prison camp, stn damaged, school, school, school building, school house, school property, schools, bg, eg, embu guard, farm guard, forest guard, home guard, ikandine guard, kathanjure guard, kijabe guard, kikuyu guard, masai guard, meru guard, nandi guard, nkubu guard, stock guard, tigoni guard, tp and eg patrol, tp patrol, home guard patrol, m, m/g, m/g patrol, g, kathanjure hg, k g, ng, eg patrol, hg camp, hg leader, hg patrol, hg post, home, home guard, kg post, arab combat, arab combat unit, arab combat units, asian combat, asian combat unit, asian combat team, second asian combat unit, asian combat units, kings african rifles, 3 kar, 4 kar, 5 kar, 6 kar, 7 kar, 23 kar, 26 kar, k.a.r, k.p.r, k.a.r., 5th k.a.r, 5kar, 5 k.a.r, 4th kar, british military, devonshire regiment, devons, field intelligence assistant, field intelligence officer, fio, gloucestershire regiment, glosters, lancashire fusiliers, king's shropshire light infantry, royal east kent regiment, buffs, royal fusiliers, royal highland regiment, black watch, watch, royal inniskilling fusiliers, royal irish fusiliers, royal northumberland fusiliers, rnf, police and military, army, lancashire fusilliers, sp company 1 royal innisks, 1 rnf, rif, ksli, inniskillings, 1 glosters, 1 bw, 1 buffs, "a" company 1 royal innisks, "a" company, royal fusilers, of devons, of 1 glosters, lanc fus, fusiliers, fio kruger, fios, a co devon, 4 platoon support company, "c" company1 royal innisks, 6 platoonsp company 1 royal innisks, 1 lf, "c" company, "d" company, "a", "a" company bw, buffs ambush, d company, d' force, devens, c company, "d" force, army officer, british army officer, british military, buffs patrol, european officer, european soldiers, gloster patrol, kenya regiment, captain folliott’s team, kr, kenreg, kenregg, kenya regiment sergeant, kenya regt, keniya regiment, kenya regiment private, military (generic), captain, company, army, military property, platoon, security forces, security force, coy, striking force, sentry, non commissioned officers, patrol, sentrie, sgt white, psuedo gangs, pseudo gang, pseudo team, trojan, psuedo gangs, trojan team, tracker group, pseudo teams, royal air force, raf, bombers, air strike, harvards, raf lincolns, flying squard, paramilitary, general service unit, gsu, cid, kenya police, kp, kp constables' quarters, kpa, kenya police reserve, kpr officers, reserve police officer, rpo, rpos, police and k.p.r, police (generic), constable, polce, policy party, railway police, special branch, blue doctor team, special branch team, sb officers, githumu police, masai special constable, tribal police, tpeg, african constable, african costable, african special constable, tribal police, tribal police reserve, tpr, communities, manyatta, fishing camp, sublocation, village, camp, villages, detainees, prisoner, prisoners, suspected insurgents, bandits, food foragers, gangs, gang, kiama kia muingi, kkm, komerera, mau mau, oath administrator, passive wing, rebels, suspects, terrorists, terrorosts, terrorist, gunman, terorist, gunmen, resistance group, resistance groups, oath administrater, oath administrators, passive wing members, resistance, suspect, suspected insurgents, terroist, terroists, terrost, africans, children, civilian, driver, employees, evangelist, family, farm boys, girls, informer, laborour, loyalist, masai, men, mission staff, owner, passengers, people, tugen tribesmen, stranger, sikh, herd boys, isiolo game scouts, farm labour, farmer, european, employer, employee, civilan, shopkeeper, students, teachers, turkana, vigilantes, women, workers, villagers, labour, local labour, kikuyus, embu, tiriki houseboy, samburu, manager, woman, vetofficer, mrhiggins, masai party, kuria tribesmen, manager of akira estates, kuria tribesmen, chstephen, african, catholic misson staff, african staff, asian women, bus conductor, child, civilian(food carriers), civilian(schoolmaster), civilion, committee, committee member, courier, elder, embu tractor driver, employees of club, engine boy, girl, golf club staff, his own hut, hotel keeper, houseboy, illegal residents, indian, interpreter, kem, kikiyu, kikuyu assessor, kikuyu families, kikuyu houseboy, kikuyu labourer, kikyu, kirua village, labour line, labour lines, labourer, labourers, laboures, labourline, labours, males, man, maragoli, maragoli labourer, masai elders, masai tribesman, members of the thika committee, mna section leaders, municipal inspectors, non kikuyu employees, person, prostitutes, purke masai, pwd employee, railway employees, school master, school teacher, sisters committee, somali, staff, strangers, taxi drivers, teacher, treasurers, headman's son, norton traill's labour, gordon's labour, food carriers
#https://community.rstudio.com/t/using-list-argument-with-fct-collapse/6552/7
# sort(table(events$initiator_clean_1_agglow))
lowlevelagg <- c(
  "arab combat units", "cid", "psuedo gangs", "asian combat units", "special branch",
  "tribal authorities", "tribal police reserve", "royal air force",
  "paramilitary", "kenya regiment", "tribal police", "kenya police reserve", "kenya police",
  "british military", "civilians", "Kings African Rifles", "military (generic)", "police (generic)",
  "railway police", "home guard", "colonial authorities", "suspected insurgents"
)
#events <- events %>%
#  mutate(initiator_clean_1_agglow=ifelse(initiator_clean_1_agglow  %in% lowlevelagg & !is.na(initiator_clean_1_agglow),initiator_clean_1_agglow, "uncategorized")) %>%
#  mutate(initiator_clean_2_agglow=ifelse(initiator_clean_2_agglow  %in% lowlevelagg & !is.na(initiator_clean_2_agglow),initiator_clean_2_agglow, "uncategorized")) %>%  
#  mutate(initiator_clean_3_agglow=ifelse(initiator_clean_3_agglow  %in% lowlevelagg & !is.na(initiator_clean_3_agglow),initiator_clean_3_agglow, "uncategorized"))
# table(events$initiator_clean_1_agglow, useNA="always")

events[, c("initiator_clean_1_aggmed", "initiator_clean_2_aggmed", "initiator_clean_3_aggmed")] <-
  events[, c("initiator_clean_1_agglow", "initiator_clean_2_agglow", "initiator_clean_3_agglow")]
events <- events %>%
  mutate_at(
    vars(starts_with("initiator_clean_1_aggmed|initiator_clean_2_aggmed|initiator_clean_3_aggmed")),
    .funs = funs(forcats::fct_collapse(
     'police'=c('cid','kenya police reserve','kenya police','police (generic)','railway police','special branch','tribal police','tribal police reserve') ,
     'military'=c('arab combat units','asian combat units','british military','kings african rifles','kenya regiment','military (generic)','psuedo gangs','royal air force') , 
     'civil authorities'=c('colonial authorities', 'tribal authorities')
     ))
  )

events$initiator_clean_2_aggmed %>%
  janitor::tabyl(sort = TRUE) %>%
  janitor::adorn_crosstab(digits = 1)

events[, c("initiator_clean_1_agghigh", "initiator_clean_2_agghigh", "initiator_clean_3_agghigh")] <-
  events[, c("initiator_clean_1_aggmed", "initiator_clean_2_aggmed", "initiator_clean_3_aggmed")]
events <- events %>%
  mutate_at(
    vars(starts_with("initiator_clean_1_agghigh|initiator_clean_2_agghigh|initiator_clean_3_agghigh")),
    .funs = funs(forcats::fct_collapse(
                  'government'=c('civil authorities', 'home guard', 'military', 'police', 'paramilitary') ,
                  'rebels'=c('suspected insurgents') 
          ))
  )

events$initiator_clean_3_agghigh %>%
  janitor::tabyl(sort = TRUE) %>%
  janitor::adorn_crosstab(digits = 1)

Target of Event

regex <- "\\.|patrol|[1-9]\\s*rd|[1-9]\\s*th" # with regex start trying to get more of these to automatically map instead of generating lots of hand codings
events$target_clean <- events$initiator %>% stringr::str_trim() %>% tolower() %>% gsub(regex, "", .)
events <- events %>%
  dplyr::select(-one_of("target_clean_1", "target_clean_2", "target_clean_3")) %>% # separate will continue to add columns every time its run so drop old versions. First time this is run will throw a warning.
  tidyr ::separate(
    col = initiator_clean,
    into = c("target_clean_1", "target_clean_2", "target_clean_3"),
    sep = "and|\\\\|/|\\&|,", remove = F, extra = "drop", fill = "right"
  )
Unknown columns: `target_clean_1`, `target_clean_2`, `target_clean_3`
events <- events %>% 
  mutate_at(vars(starts_with("target_clean_")), funs(gsub(".*terror.*|.*erori.*|.*erroris*|.*mau mau.*|.*gang.*", "terrorist", ., ignore.case =T))  )  %>%
  mutate_at(vars(starts_with("target_clean_")), funs(gsub(".*police.*", "police", ., ignore.case =T))) %>%
  mutate_at(vars(starts_with("target_clean_")), funs(gsub(".*guard.*", "guard", ., ignore.case =T))) %>%
  mutate_at(vars(starts_with("target_clean_")), funs(gsub(".*kpr.*|.*k p r.*", "kpr", ., ignore.case =T))) %>%
  mutate_at(vars(starts_with("target_clean_")), funs(gsub(".*kar.*|.*k a r.*", "kar", ., ignore.case =T))) %>%
  mutate_at(vars(starts_with("target_clean_")), funs(gsub(".*coy.*", "coy", ., ignore.case =T))) %>%
  mutate_at(vars(starts_with("target_clean_")), funs(gsub(".*gsu.*", "gsu", ., ignore.case =T))) %>%
  mutate_at(vars(starts_with("target_clean_")), funs(gsub(".*watch.*", "watch", ., ignore.case =T))) %>%
  mutate_at(vars(starts_with("target_clean_")), funs(trimws(.)))
events$target_clean_1 %>%
  janitor::tabyl(sort = TRUE) %>%
  janitor::adorn_crosstab(digits = 1)
'janitor::adorn_crosstab' is deprecated.
Use 'use the various adorn_ functions instead.  See the "tabyl" vignette for examples.' instead.
See help("Deprecated")
events <- events %>% 
          mutate(target_clean_1_agglow = do.call(forcats::fct_collapse, c(list(events$target_clean_1), initiator_target_master_clean)) %>% as.character() ) %>%
          mutate(target_clean_2_agglow = do.call(forcats::fct_collapse, c(list(events$target_clean_2), initiator_target_master_clean)) %>% as.character()) %>%
          mutate(target_clean_3_agglow = do.call(forcats::fct_collapse, c(list(events$target_clean_3), initiator_target_master_clean)) %>% as.character())
Unknown levels in `f`: ammunition, explosives, gelignite, firearms, arms, firearm, gun, pistol, rifle, ammunition, rifile, shotgun, verey pistol, other weapons, axe, scabbard, weapons, colonial authorities, councillor, district commissioner, district officer, government employees, port authority, public works department, colonial authorities, govtemployee, tribal authorities, elders, chief's sentry, private property, buildings, cattle dip, duka, farms, garage, homes, huts, hotel, land rover, lorry, market, office, oxcart, property, pump house, sawmill, shops, stores, tractor, vehicle, windmill, bullock's farm, cattle boma, coffe trees, coffee trees, cuthouse, dairy farm, dip, house, household, houses, hut, instrument, labour camp post, labour huts, lorries, lucerne sheds, maize shamba, milk factory, pig sty, private property, property of civilians, shop, store, thika fishing camp, vehicles, cash, funds, money, conductor's takings, banana, barley, bran, cabbage, coffee, corn, cream, crops, dairy, food, fruit, grain, honey, maize, meat, milk, oats, posho, potatoes, sugar, vegetable, wheat, food, food etc, food store, food stores, foodstuffs, fruits, grains, grains+cloth +money, green maize cobs, potato, potato store, potatos, skimmed milk, sugar cane, sugar maize, vegetables, vegitable garden, vegitables, wheat bags, wheat store, wheet, whisky, beast, cattle, cow, herd, livestock, pig, sheep, steer, stock, animal, bulls, calf, calves, chicken, cows, donkey, goat, goats, head of cattle, head of cow, head of sheep, heifer, heifers, lamb, live stock, livestock, livestocks, masai herd, milk cow, ox, ox cart, oxen, ram, red poll cattle, shee, sheep or ox, steers, stocks, medical supplies, medicine, m&b tablets, medicines, supplies, bags, bedding, blankets, books, charcoal, cloth, clothing, cooking utensils, cutlery, equipment, farm implements, household items, instruments, iron, pails, petrol, provisions, oil, sacks, supplies, tarpaulin, thatch, timber, tobacco, tools, uniforms, wire, wireless set, whiskey, articles, bag, battery, bucket, ciga, cigarettes, clothes, clothing etc, cloths, dairy item, dairy record book, goods, material, oil+tins, provisionv, railway uniforms, supplies, tarpaulian, typewriter, v- drive belts, gunny bags, church, airstrip, bridges, half built village, roads, trenches, water tank, bridge, bridge broken, bridge damaged, infrastructure, milt property, miltproperty, prison camp, stn damaged, school, school, school building, school house, school property, schools, embu guard, farm guard, forest guard, home guard, ikandine guard, kathanjure guard, kijabe guard, kikuyu guard, masai guard, meru guard, nandi guard, nkubu guard, stock guard, tigoni guard, tp and eg patrol, tp patrol, home guard patrol, m/g, m/g patrol, g, eg patrol, hg camp, hg leader, hg patrol, hg post, home, home guard, kg post, arab combat, arab combat units, asian combat, asian combat units, kings african rifles, 3 kar, 4 kar, 5 kar, 6 kar, 7 kar, 23 kar, 26 kar, k.a.r, k.p.r, k.a.r., 5th k.a.r, 5kar, 5 k.a.r, 4th kar, british military, devonshire regiment, field intelligence assistant, field intelligence officer, gloucestershire regiment, glosters, king's shropshire light infantry, royal east kent regiment, royal fusiliers, royal highland regiment, black watch, royal inniskilling fusiliers, royal northumberland fusiliers, rnf, police and military, army officer, british army officer, british military, buffs patrol, european officer, european soldiers, gloster patrol, captain folliott’s team, kenya regiment private, military (generic), captain, military property, platoon, non commissioned officers, patrol, sentrie, sgt white, psuedo gangs, pseudo gang, psuedo gangs, royal air force, paramilitary, kenya police, kp, kp constables' quarters, kpa, kenya police reserve, kpr officers, reserve police officer, police and k.p.r, police (generic), railway police, githumu police, masai special constable, tribal police, african constable, african costable, african special constable, tribal police, tribal police reserve, communities, manyatta, fishing camp, sublocation, village, camp, villages, detainees, prisoner, prisoners, suspected insurgents, bandits, food foragers, gangs, gang, kiama kia muingi, kkm, komerera, mau mau, oath administrator, passive wing, suspects, terrorists, terrorosts, terorist, oath administrater, oath administrators, passive wing members, resistance, suspect, suspected insurgents, terroist, terroists, terrost, africans, children, driver, employees, evangelist, family, farm boys, girls, informer, laborour, loyalist, mission staff, passengers, people, shopkeeper, students, teachers, vigilantes, women, workers, catholic misson staff, african staff, asian women, bus conductor, child, civilian(food carriers), civilian(schoolmaster), civilion, committee, committee member, courier, elder, embu tractor driver, employees of club, engine boy, girl, golf club staff, his own hut, hotel keeper, houseboy, illegal residents, indian, interpreter, kem, kikiyu, kikuyu assessor, kikuyu families, kikuyu houseboy, kikuyu labourer, kikyu, kirua village, labour line, labour lines, labourer, labourers, laboures, labourline, labours, males, man, maragoli, maragoli labourer, masai elders, masai tribesman, members of the thika committee, mna section leaders, municipal inspectors, non kikuyu employees, person, prostitutes, purke masai, pwd employee, railway employees, school master, school teacher, sisters committee, somali, staff, strangers, taxi drivers, teacher, treasurers, headman's son, gordon's labour, food carriersUnknown levels in `f`: ammunition, explosives, gelignite, firearms, arms, firearm, gun, pistol, rifle, ammunition, rifile, shotgun, verey pistol, other weapons, axe, scabbard, weapons, colonial authorities, councillor, district commissioner, district officer, forest ranger, game ranger, game warden, government employees, port authority, public works department, screening team, govrnment, wakamba screening team, do munuga, african do, dcmeru, colonial authorities, govtemployee, tribal authorities, elders, headman, chief chostram, chief eliud, chief's sentry, private property, buildings, cattle dip, duka, farms, garage, homes, huts, hotel, land rover, lorry, market, office, oxcart, property, pump house, sawmill, shops, stores, tractor, vehicle, windmill, bullock's farm, cattle boma, coffe trees, coffee trees, cuthouse, dairy farm, dip, house, household, houses, hut, instrument, labour camp post, labour huts, lorries, lucerne sheds, maize shamba, milk factory, pig sty, private property, property of civilians, shop, store, thika fishing camp, vehicles, cash, funds, money, conductor's takings, banana, barley, bran, cabbage, coffee, corn, cream, crops, dairy, food, fruit, grain, honey, maize, meat, milk, oats, posho, potatoes, sugar, vegetable, wheat, food, food etc, food store, food stores, foodstuffs, fruits, grains, grains+cloth +money, green maize cobs, potato, potato store, potatos, skimmed milk, sugar cane, sugar maize, vegetables, vegitable garden, vegitables, wheat bags, wheat store, wheet, whisky, beast, cattle, cow, herd, livestock, pig, sheep, steer, stock, animal, bulls, calf, calves, chicken, cows, donkey, goat, goats, head of cattle, head of cow, head of sheep, heifer, heifers, lamb, live stock, livestock, livestocks, masai herd, milk cow, ox, ox cart, oxen, ram, red poll cattle, shee, sheep or ox, steers, stocks, medical supplies, medicine, m&b tablets, medicines, supplies, bags, bedding, blankets, books, charcoal, cloth, clothing, cooking utensils, cutlery, equipment, farm implements, household items, instruments, iron, pails, petrol, provisions, oil, sacks, supplies, tarpaulin, thatch, timber, tobacco, tools, uniforms, wire, wireless set, whiskey, articles, bag, battery, bucket, ciga, cigarettes, clothes, clothing etc, cloths, dairy item, dairy record book, goods, material, oil+tins, provisionv, railway uniforms, supplies, tarpaulian, typewriter, v- drive belts, gunny bags, church, airstrip, bridges, half built village, roads, trenches, water tank, bridge, bridge broken, bridge damaged, infrastructure, milt property, miltproperty, prison camp, stn damaged, school, school, school building, school house, school property, schools, bg, embu guard, farm guard, forest guard, home guard, ikandine guard, kathanjure guard, kijabe guard, kikuyu guard, masai guard, meru guard, nandi guard, nkubu guard, stock guard, tigoni guard, tp and eg patrol, tp patrol, home guard patrol, m/g, m/g patrol, kathanjure hg, k g, ng, eg patrol, hg camp, hg leader, hg patrol, hg post, home, home guard, kg post, arab combat, arab combat unit, arab combat units, asian combat, asian combat team, second asian combat unit, asian combat units, kings african rifles, 3 kar, 4 kar, 5 kar, 6 kar, 7 kar, 23 kar, 26 kar, k.a.r, k.p.r, k.a.r., 5th k.a.r, 5kar, 5 k.a.r, 4th kar, british military, devonshire regiment, devons, field intelligence assistant, field intelligence officer, fio, gloucestershire regiment, glosters, lancashire fusiliers, king's shropshire light infantry, royal east kent regiment, royal fusiliers, royal highland regiment, black watch, royal inniskilling fusiliers, royal irish fusiliers, royal northumberland fusiliers, rnf, police and military, lancashire fusilliers, sp company 1 royal innisks, 1 rnf, rif, ksli, inniskillings, 1 glosters, 1 bw, 1 buffs, "a" company 1 royal innisks, "a" company, royal fusilers, of devons, of 1 glosters, lanc fus, fusiliers, fio kruger, fios, a co devon, 4 platoon support company, "c" company1 royal innisks, 6 platoonsp company 1 royal innisks, 1 lf, "c" company, "d" company, "a", "a" company bw, buffs ambush, d company, d' force, devens, c company, "d" force, army officer, british army officer, british military, buffs patrol, european officer, european soldiers, gloster patrol, captain folliott’s team, kenreg, kenregg, kenya regiment sergeant, keniya regiment, kenya regiment private, military (generic), captain, company, military property, platoon, security forces, security force, striking force, sentry, non commissioned officers, patrol, sentrie, sgt white, psuedo gangs, pseudo gang, pseudo team, trojan, psuedo gangs, trojan team, tracker group, pseudo teams, royal air force, raf, bombers, air strike, harvards, raf lincolns, flying squard, paramilitary, general service unit, cid, kenya police, kp constables' quarters, kpa, kenya police reserve, kpr officers, reserve police officer, rpo, rpos, police and k.p.r, police (generic), constable, polce, policy party, railway police, special branch, blue doctor team, special branch team, sb officers, githumu police, masai special constable, tribal police, tpeg, african constable, african costable, african special constable, tribal police, tribal police reserve, communities, manyatta, fishing camp, sublocation, village, camp, villages, detainees, prisoner, prisoners, suspected insurgents, bandits, food foragers, gangs, gang, kiama kia muingi, kkm, komerera, mau mau, oath administrator, passive wing, rebels, suspects, terrorists, terrorosts, gunman, terorist, gunmen, resistance group, resistance groups, oath administrater, oath administrators, passive wing members, resistance, suspect, suspected insurgents, terroist, terroists, terrost, africans, children, civilian, driver, employees, evangelist, family, farm boys, girls, informer, kikuyu, laborour, loyalist, masai, men, mission staff, owner, passengers, people, tugen tribesmen, stranger, sikh, herd boys, isiolo game scouts, farm labour, farmer, european, employer, employee, civilan, shopkeeper, students, teachers, turkana, vigilantes, women, workers, villagers, local labour, kikuyus, embu, tiriki houseboy, samburu, manager, woman, vetofficer, mrhiggins, masai party, kuria tribesmen, manager of akira estates, kuria tribesmen, chstephen, african, catholic misson staff, african staff, asian women, bus conductor, child, civilian(food carriers), civilian(schoolmaster), civilion, committee, committee member, courier, elder, embu tractor driver, employees of club, engine boy, girl, golf club staff, his own hut, hotel keeper, houseboy, illegal residents, indian, interpreter, kem, kikiyu, kikuyu assessor, kikuyu families, kikuyu houseboy, kikuyu labourer, kikyu, kirua village, labour line, labour lines, labourer, labourers, laboures, labourline, labours, males, man, maragoli, maragoli labourer, masai elders, masai tribesman, members of the thika committee, mna section leaders, municipal inspectors, non kikuyu employees, person, prostitutes, purke masai, pwd employee, railway employees, school master, school teacher, sisters committee, somali, staff, strangers, taxi drivers, teacher, treasurers, headman's son, norton traill's labour, gordon's labour, food carriersUnknown levels in `f`: ammunition, explosives, gelignite, firearms, arms, firearm, gun, pistol, rifle, ammunition, rifile, shotgun, verey pistol, other weapons, axe, scabbard, weapons, colonial authorities, councillor, district commissioner, district officer, forest ranger, game ranger, game warden, government, government employees, port authority, public works department, screening team, do, govrnment, wakamba screening team, do munuga, african do, dcmeru, colonial authorities, govtemployee, tribal authorities, chief, elders, headman, chief chostram, chief eliud, chief's sentry, private property, buildings, cattle dip, duka, farms, garage, homes, huts, hotel, land rover, lorry, market, office, oxcart, property, pump house, sawmill, shops, stores, tractor, vehicle, windmill, bullock's farm, cattle boma, coffe trees, coffee trees, cuthouse, dairy farm, dip, house, household, houses, hut, instrument, labour camp post, labour huts, lorries, lucerne sheds, maize shamba, milk factory, pig sty, private property, property of civilians, shop, store, thika fishing camp, vehicles, cash, funds, money, conductor's takings, banana, barley, bran, cabbage, coffee, corn, cream, crops, dairy, food, fruit, grain, honey, maize, meat, milk, oats, posho, potatoes, sugar, vegetable, wheat, food, food etc, food store, food stores, foodstuffs, fruits, grains, grains+cloth +money, green maize cobs, potato, potato store, potatos, skimmed milk, sugar cane, sugar maize, vegetables, vegitable garden, vegitables, wheat bags, wheat store, wheet, whisky, beast, cattle, cow, herd, livestock, pig, sheep, steer, stock, animal, bulls, calf, calves, chicken, cows, donkey, goat, goats, head of cattle, head of cow, head of sheep, heifer, heifers, lamb, live stock, livestock, livestocks, masai herd, milk cow, ox, ox cart, oxen, ram, red poll cattle, shee, sheep or ox, steers, stocks, medical supplies, medicine, m&b tablets, medicines, supplies, bags, bedding, blankets, books, charcoal, cloth, clothing, cooking utensils, cutlery, equipment, farm implements, household items, instruments, iron, pails, petrol, provisions, oil, sacks, supplies, tarpaulin, thatch, timber, tobacco, tools, uniforms, wire, wireless set, whiskey, articles, bag, battery, bucket, ciga, cigarettes, clothes, clothing etc, cloths, dairy item, dairy record book, goods, material, oil+tins, provisionv, railway uniforms, supplies, tarpaulian, typewriter, v- drive belts, gunny bags, church, airstrip, bridges, half built village, roads, trenches, water tank, bridge, bridge broken, bridge damaged, infrastructure, milt property, miltproperty, prison camp, stn damaged, school, school, school building, school house, school property, schools, bg, eg, embu guard, farm guard, forest guard, home guard, ikandine guard, kathanjure guard, kijabe guard, kikuyu guard, masai guard, meru guard, nandi guard, nkubu guard, stock guard, tigoni guard, tp and eg patrol, tp patrol, home guard patrol, m, m/g, m/g patrol, g, kathanjure hg, k g, ng, eg patrol, hg camp, hg leader, hg patrol, hg post, home, home guard, kg post, arab combat, arab combat unit, arab combat units, asian combat, asian combat unit, asian combat team, second asian combat unit, asian combat units, kings african rifles, 3 kar, 4 kar, 5 kar, 6 kar, 7 kar, 23 kar, 26 kar, k.a.r, k.p.r, k.a.r., 5th k.a.r, 5kar, 5 k.a.r, 4th kar, british military, devonshire regiment, devons, field intelligence assistant, field intelligence officer, fio, gloucestershire regiment, glosters, lancashire fusiliers, king's shropshire light infantry, royal east kent regiment, buffs, royal fusiliers, royal highland regiment, black watch, watch, royal inniskilling fusiliers, royal irish fusiliers, royal northumberland fusiliers, rnf, police and military, army, lancashire fusilliers, sp company 1 royal innisks, 1 rnf, rif, ksli, inniskillings, 1 glosters, 1 bw, 1 buffs, "a" company 1 royal innisks, "a" company, royal fusilers, of devons, of 1 glosters, lanc fus, fusiliers, fio kruger, fios, a co devon, 4 platoon support company, "c" company1 royal innisks, 6 platoonsp company 1 royal innisks, 1 lf, "c" company, "d" company, "a", "a" company bw, buffs ambush, d company, d' force, devens, c company, "d" force, army officer, british army officer, british military, buffs patrol, european officer, european soldiers, gloster patrol, kenya regiment, captain folliott’s team, kr, kenreg, kenregg, kenya regiment sergeant, kenya regt, keniya regiment, kenya regiment private, military (generic), captain, company, army, military property, platoon, security forces, security force, coy, striking force, sentry, non commissioned officers, patrol, sentrie, sgt white, psuedo gangs, pseudo gang, pseudo team, trojan, psuedo gangs, trojan team, tracker group, pseudo teams, royal air force, raf, bombers, air strike, harvards, raf lincolns, flying squard, paramilitary, general service unit, gsu, cid, kenya police, kp, kp constables' quarters, kpa, kenya police reserve, kpr officers, reserve police officer, rpo, rpos, police and k.p.r, police (generic), constable, polce, policy party, railway police, special branch, blue doctor team, special branch team, sb officers, githumu police, masai special constable, tribal police, tpeg, african constable, african costable, african special constable, tribal police, tribal police reserve, tpr, communities, manyatta, fishing camp, sublocation, village, camp, villages, detainees, prisoner, prisoners, suspected insurgents, bandits, food foragers, gangs, gang, kiama kia muingi, kkm, komerera, mau mau, oath administrator, passive wing, rebels, suspects, terrorists, terrorosts, terrorist, gunman, terorist, gunmen, resistance group, resistance groups, oath administrater, oath administrators, passive wing members, resistance, suspect, suspected insurgents, terroist, terroists, terrost, africans, children, civilian, driver, employees, evangelist, family, farm boys, girls, informer, laborour, loyalist, masai, men, mission staff, owner, passengers, people, tugen tribesmen, stranger, sikh, herd boys, isiolo game scouts, farm labour, farmer, european, employer, employee, civilan, shopkeeper, students, teachers, turkana, vigilantes, women, workers, villagers, labour, local labour, kikuyus, embu, tiriki houseboy, samburu, manager, woman, vetofficer, mrhiggins, masai party, kuria tribesmen, manager of akira estates, kuria tribesmen, chstephen, african, catholic misson staff, african staff, asian women, bus conductor, child, civilian(food carriers), civilian(schoolmaster), civilion, committee, committee member, courier, elder, embu tractor driver, employees of club, engine boy, girl, golf club staff, his own hut, hotel keeper, houseboy, illegal residents, indian, interpreter, kem, kikiyu, kikuyu assessor, kikuyu families, kikuyu houseboy, kikuyu labourer, kikyu, kirua village, labour line, labour lines, labourer, labourers, laboures, labourline, labours, males, man, maragoli, maragoli labourer, masai elders, masai tribesman, members of the thika committee, mna section leaders, municipal inspectors, non kikuyu employees, person, prostitutes, purke masai, pwd employee, railway employees, school master, school teacher, sisters committee, somali, staff, strangers, taxi drivers, teacher, treasurers, headman's son, norton traill's labour, gordon's labour, food carriers
lowlevelagg <- c(
  "church", "kenya police", "medicine", "tribal police reserve", "detainees", "kenya regiment", "other weapons",
  "paramilitary", "ammunition", "communities", "british military", "military (generic)", "tribal authorities", "kenya police reserve", "tribal police",
  "Kings African Rifles", "infrastructure", "school", "cash", "colonial authorities", "police (generic)", "supplies", "firearms", "food", "private property",
  "home guard", "civilians", "livestock", "suspected insurgents"
)
#events <- events %>%
#   mutate(target_clean_1_agglow=ifelse(target_clean_1_agglow  %in% lowlevelagg & !is.na(target_clean_1_agglow),target_clean_1_agglow, "uncategorized")) %>%
#   mutate(target_clean_2_agglow=ifelse(target_clean_2_agglow  %in% lowlevelagg & !is.na(target_clean_2_agglow),target_clean_2_agglow, "uncategorized")) %>% 
#   mutate(target_clean_3_agglow=ifelse(target_clean_3_agglow  %in% lowlevelagg & !is.na(target_clean_3_agglow),target_clean_3_agglow, "uncategorized"))
events$target_clean_1_agglow %>%
  janitor::tabyl(sort = TRUE) %>%
  janitor::adorn_crosstab(digits = 1)
'janitor::adorn_crosstab' is deprecated.
Use 'use the various adorn_ functions instead.  See the "tabyl" vignette for examples.' instead.
See help("Deprecated")
events[, c("target_clean_1_aggmed", "target_clean_2_aggmed", "target_clean_3_aggmed")] <-
  events[, c("target_clean_1_agglow", "target_clean_2_agglow", "target_clean_3_agglow")]
events <- events %>%
  mutate_at(
    vars(starts_with("initiator_clean_1_aggmed|initiator_clean_2_aggmed|initiator_clean_3_aggmed")),
    .funs = funs(forcats::fct_collapse(temp,
     'police'=c('cid','kenya police reserve','kenya police','police (generic)','railway police','special branch','tribal police','tribal police reserve') ,
     'military'=c('arab combat units','asian combat units','british military','Kings African Rifles','kenya regiment','military (generic)','psuedo gangs','royal air force'), 
     'civil authorities'=c('colonial authorities', 'tribal authorities'),
     'armaments'=c('ammunition','firearms','other weapons'),
     'provisions'=c('cash','food','livestock','medicine','supplies'),
     'public buildings'=c('church','school','infrastructure') 
     ))
  )
events$initiator_clean_1_aggmed %>%
  janitor::tabyl(sort = TRUE) %>%
  janitor::adorn_crosstab(digits = 1)
'janitor::adorn_crosstab' is deprecated.
Use 'use the various adorn_ functions instead.  See the "tabyl" vignette for examples.' instead.
See help("Deprecated")

events[, c("target_clean_1_agghigh", "target_clean_2_agghigh", "target_clean_3_agghigh")] <-
  events[, c("target_clean_1_aggmed", "target_clean_2_aggmed", "target_clean_3_aggmed")]
events <- events %>%
  mutate_at(
    vars(starts_with("target_clean_1_agghigh|target_clean_2_agghigh|target_clean_3_agghigh")),
    .funs = funs(forcats::fct_collapse(
                  'government'=c('civil authorities', 'home guard', 'military', 'police', 'paramilitary') ,
                  'rebels'=c('suspected insurgents','detainees') ,
                  'property'=c('armaments','private property','provisions','public buildings') ,
                  'civilians'=c('communities','communities')
          ))
  )

events$target_clean_1_agghigh %>%
  janitor::tabyl(sort = TRUE) %>%
  janitor::adorn_crosstab(digits = 1)

Count of Initiators and Targets

# These numbers are estimates and can be changed
# These columns are dirty and when an incoherent value is given, it is set to missing.
recode_counts <- function(indata){
  acouple <- "2"
  afew <- "3"
  agang <- "6"
  agang_large <- "12"
  
  indata %>% tolower() %>% dplyr::recode(
    "100+"= "100",
    "??"= "",
    "1 bag"= "1",
    "1 blanket"= "1",
    "1 burnt down"= "1",
    "1 civilian"= "1",
    "1 cow, 6 sheep"= "7",
    "1 cow"= "1",
    "1 goat, clothing"= "1",
    "1 goat"= "1",
    "1 looted"= "1",
    "1 looted"= "1",
    "1 ox"= "1",
    "1 sheep and chickens"= "1",
    "1 sheep, some chickens"= "1",
    "1 sheep"= "1",
    "1 shotgun ,30 rounds"= "31",
    "1 shotgun + 10rds"= "11",
    "1 steer"= "1",
    "1 village, 1 market"= "1",
    "1 wounded"= "1",
    "1 wrecked"= "1",
    "1+"= "1",
    "1+3"= "4",
    "1+some"= "1",
    "10 acres"= "10",
    "10 bags"= "10",
    "10 cattle"= "10",
    "10 sacks"= "10",
    "10 to 12"= "11",
    "10 to 15"= "13",
    "10/14/2013"= "",
    "10/15/2013"= "",
    "10/20/2013"= "",
    "100 lb"= "100",
    "100-130"= "115",
    "100-150"= "125",
    "100+"= "100",
    "10000"= "",
    "109 cattle"= "109",
    "10bags potatoes"= "10",
    "11 cattle"= "11",
    "11 sheep"= "11",
    "112 bore & 20.1.45 &7 rds"= "112",
    "12 bags"= "12",
    "12 cattle"= "12",
    "12 goats"= "12",
    "12 to 15"= "13",
    "12 to 20"= "17",
    "12/14/2013"= "",
    "120 cattle"= "120",
    "120+1"= "121",
    "13 sheep"= "13",
    "13-15"= "14",
    "1300 worth"= "1300",
    "14 cattle"= "14",
    "14 goats"= "14",
    "14 head"= "14",
    "14+"= "14",
    "15 - 20"= "18",
    "15 cattle"= "15",
    "15 to 20"= "17",
    "15 to 20"= "17",
    "15 to 25"= "20",
    "15-20"= "17",
    "15+"= "15",
    "150-200"= "175",
    "150+"= "150",
    "151 cattle"= "151",
    "17 cattle"= "17",
    "172 bags burnt"= "172",
    "18 cattle"= "18",
    "19 bags"= "19",
    "196 rounds"= "196",
    "2 bags maize"= "2",
    "2 bags"= "2",
    "2 bags"= "2",
    "2 buckets"= "2",
    "2 cattle hamstrung"= "2",
    "2 cattle, corn"= "3",
    "2 cattle"= "2",
    "2 cows"= "2",
    "2 debbies"= "2",
    "2 goats"= "2",
    "2 groups"= "2",
    "2 huts burnt"= "2",
    "2 sheep"= "2",
    "2 watches, cash"= "2",
    "2/3/2013"= "",
    "2+"= "2",
    "20 bags maize, 9 goats, 32 chickens and ducks, cash"= "60",
    "20 bags"= "20",
    "20 cattle"= "20",
    "20 goats"= "20",
    "20 sheep"= "20",
    "20 to 25"= "23",
    "20 to 30"= "25",
    "20 to 40"= "30",
    "20-25"= "23",
    "20-30"= "25",
    "20-35"= "30",
    "20-50"= "35",
    "20/30"= "25",
    "20/30"= "25",
    "20+"= "20",
    "200 yds"= "200",
    "200-300"= "250",
    "200+"= "200",
    "2000 acres"= "2000",
    "21 goats"= "21",
    "21 head"= "21",
    "22 cattle"= "22",
    "25 to 30"= "28",
    "25-30"= "27",
    "25-30"= "27",
    "28 killed"= "28",
    "28 sheep"= "28",
    "3 bags"= "3",
    "3 bags"= "3",
    "3 bikes"= "3",
    "3 cattle"= "3",
    "3 cattle"= "3",
    "3 goats"= "3",
    "3 or 4"= "3",
    "3 or 4"= "3",
    "3 pangas"= "3",
    "3 sheep, 2 calves"= "5",
    "3 sheep"= "3",
    "3 to 4"= "3",
    "3 to 4"= "3",
    "3/10/2013"= "",
    "3/4/2013"= "",
    "3/5/2013"= "",
    "3/6/2013"= "",
    "3+"= "3",
    "3+3+1+2"= "9",
    "3+some"= "3",
    "30 acres"= "30",
    "30 cattle"= "30",
    "30 to 40"= "35",
    "30-35"= "33",
    "30-40"= "35",
    "30-50"= "40",
    "30+"= "30",
    "300-400"= "350",
    "300+"= "300",
    "35 bags"= "35",
    "35 to 40"= "37",
    "38 cattle"= "38",
    "3or 4"= "3",
    "4 bags potatoes"= "4",
    "4 bags"= "4",
    "4 goats"= "4",
    "4 groups"= "",
    "4 or 5"= "4",
    "4 oxen"= "4",
    "4 sheep"= "4",
    "4 to 8"= "6",
    "4/6/2013"= "",
    "40 bag"= "40",
    "40 cattle"= "40",
    "40 sacks"= "40",
    "40 sheep"= "40",
    "40 to 50"= "45",
    "40/50"= "45",
    "40-50"= "45",
    "400 cattle"= "400",
    "4000"= "",
    "40161"="",
    "44 cattle"= "44",
    "5 bags"= "5",
    "5 calves"= "5",
    "5 cattle"= "5",
    "5 destroyed"= "5",
    "5 goats"= "5",
    "5 killed"= "5",
    "5 or 6"= "5",
    "5 sheep, 1 ox"= "6",
    "5 sheep"= "5",
    "5 to 6"= "5",
    "5/10/2013"= "",
    "5/6/2013"= "",
    "50 cattle"= "50",
    "50 to 60"= "55",
    "50-100"= "75",
    "50-60"= "55",
    "50-75"= "62",
    "50+"= "50",
    "50+"= "50",
    "5000 acres"= "5000",
    "519 +"= "519",
    "53 detained"= "53",
    "54 sheep and goats"= "54",
    "56 committee members"= "56",
    "6 bag"= "6",
    "6 bags"= "6",
    "6 cattle"= "6",
    "6 cattle"= "6",
    "6 goats"= "6",
    "6 or 7"= "6",
    "6 sheep and goats"= "6",
    "6 sheep"= "6",
    "6 to 7"= "6",
    "6 to 8"= "7",
    "6 to 9"= "8",
    "6-8 man"= "7",
    "6/10/2013"= "",
    "6/8/2013"= "",
    "60-100"= "80",
    "60-70"= "65",
    "64 cattle"= "64",
    "7 bags"= "7",
    "7 cattle"= "7",
    "7 sheep"= "7",
    "7/10/2013"= "",
    "70 bags"= "70",
    "70 cattle, sheep"= "70",
    "70-100"= "85",
    "70000"= "",
    "75 rounds"= "75",
    "8 bags potatoes"= "8",
    "8 cattle"= "8",
    "8 cows slashed"= "8",
    "8 cows"= "8",
    "8 sheep"= "8",
    "8 to 10"= "9",
    "8/10/2013"= "",
    "80 cattle"= "80",
    "80-100"= "90",
    "84 sheep, 1 cow, 5 chickens"= "90",
    "9 cattle"= "9",
    "9 sheep"= "9",
    "9 to 10"= "9",
    "9+9"= "18",
    "900(not clear)"= "900",
    "all locals"= "",
    "all"= "",
    "app 5"= "5",
    "app. 100"= "100",
    "app. 120"= "120",
    "armed gang"= agang,
    "band"= agang,
    "bands"= "",
    "cattle slashing"= "",
    "clothing"= "",
    "considerable quantity"= "",
    "fairly large gang"= agang_large,
    "few bags"= "",
    "few"= "",
    "food"= "",
    "gang"= agang,
    "gangs"= agang_large,
    "guards"= afew,
    "half village"= "",
    "labour"= "",
    "large crowd"= "",
    "large force"= agang_large,
    "large gang"= agang_large,
    "large meeting"= "",
    "large number"= "",
    "large numbers"= "",
    "large quantities"= "",
    "large quantity"= "",
    "large re-oathing ceremony"= "",
    "large scale"= "",
    "large"= agang_large,
    "largish gang"= agang_large,
    "local populace"= "",
    "many thousand"= "2000",
    "mob"= "",
    "not given"= "",
    "number"= "",
    "occupants"= "",
    "over 200"= "200",
    "Party"= agang,
    "party"= agang,
    "patrol"= agang,
    "posho"= "",
    "potatoes"= "",
    "quantity of clothing"= "",
    "section"= "",
    "several gangs"= agang_large,
    "several"= "3",
    "sheep and goats"= "",
    "shs 2,300/-"= "2300",
    "shs 60/-"= "60",
    "shs. 1,000"= "1000",
    "shs. 18"= "18",
    "shs. 30"= "30",
    "small gang"= agang,
    "small gangs"= agang,
    "small group"= agang,
    "small party"= afew,
    "small"= agang,
    "some"= afew,
    "sufficient food"= "",
    "unknown"= "",
    "very large gang"= agang_large,
    "villages in ndia, gichugu, embu divisions"= "",
    "wives"= ""
  ) %>% as.numeric() %>% return()
  
}
events$initiator_numbers_numeric <- events$initiator_numbers %>% recode_counts()
NAs introduced by coercion
events$target_numbers_numeric <- events$target_numbers %>% recode_counts()
NAs introduced by coercion
events$affected_count_numeric <- events$affected_count %>% recode_counts()
NAs introduced by coercion

Casualties

events[, c(
  "government_killed_clean", "government_wounded_clean", "government_captured_clean",
  "rebels_killed_clean", "rebels_wounded_clean", "rebels_captured_clean",
  "civilians_killed_clean", "civilians_wounded_clean", "civilians_captured_clean"
)] <-
  events[, c(
    "government_killed", "government_wounded", "government_captured",
    "rebels_killed", "rebels_wounded", "rebels_captured",
    "civilians_killed", "civilians_wounded", "civilians_captured"
  )]
events <- events %>% mutate_at(
  .vars = c(
    "government_killed_clean", "government_wounded_clean", "government_captured_clean",
    "rebels_killed_clean", "rebels_wounded_clean", "rebels_captured_clean",
    "civilians_killed_clean", "civilians_wounded_clean", "civilians_captured_clean"
  ),
  funs(as.numeric(forcats::fct_collapse(.,
                                  '1'=c('unKnown','unknown','UnKnown','UNKNOWN','Unkown','Unknown','Number','More','101','146','122','208','94'),
                                  '2'=c('Few','others','Few','some'),
                                  '3'=c('Many','Sevaral','several','Several More','Several others','Some','Council of elders','Council of war','Several','Majority','many','Gang','Several','Small gang','3+'),
                                  '100'='100+',
                                  '23'='23 Families',
                                  '28'='28 families',
                                  '35'='30-40',
                                  '50'='50+',
                                  '45'='4500', 
                                  '80'='800', 
                                  '6'='6+' , 
                                  '10'='10+' ,
                                  '10197'=NA,
                                  '7'='48') %>%
                    fct_explicit_na(na_level='0')
                  )
       )
)

Unknown levels in f: unKnown, unknown, UnKnown, UNKNOWN, Unkown, Unknown, Number, More, 146, 122, 208, 94, Few, others, Few, Many, Sevaral, several, Several More, Several others, Council of elders, Council of war, Several, Majority, many, Gang, Several, Small gang, 3+, 100+, 23 Families, 28 families, 30-40, 50+, 4500, 800, 6+, 10+, NA, 48Unknown levels in f: unKnown, unknown, UnKnown, UNKNOWN, Unkown, Unknown, Number, More, 101, 146, 122, 208, 94, Few, others, Few, some, Many, Sevaral, several, Several More, Several others, Some, Council of elders, Council of war, Several, Majority, many, Gang, Several, Small gang, 3+, 100+, 23 Families, 28 families, 30-40, 50+, 4500, 800, 6+, 10+, NA, 48Unknown levels in f: unKnown, unknown, UnKnown, UNKNOWN, Unkown, Unknown, Number, More, 101, 146, 122, 208, 94, Few, others, Few, some, Many, Sevaral, several, Several More, Several others, Some, Council of elders, Council of war, Several, Majority, many, Gang, Several, Small gang, 3+, 100+, 23 Families, 28 families, 30-40, 50+, 4500, 800, 6+, 10+, NA, 48Unknown levels in f: unKnown, unknown, UnKnown, UNKNOWN, Number, More, 101, 146, 122, 208, 94, Few, others, Few, some, Sevaral, several, Several More, Several others, Council of elders, Council of war, Majority, many, Gang, Small gang, 3+, 100+, 23 Families, 28 families, 30-40, 50+, 4500, 800, 6+, 10+, NAUnknown levels in f: unKnown, unknown, UnKnown, UNKNOWN, Unkown, Unknown, Number, 101, 146, 122, 208, some, Council of elders, Council of war, Majority, many, Gang, Small gang, 3+, 100+, 23 Families, 28 families, 30-40, 50+, 4500, 800, 10+, NAUnknown levels in f: More, 101, 146, 122, 208, 94, others, Many, Sevaral, Several More, Several others, 6+, NAUnknown levels in f: unKnown, unknown, UnKnown, UNKNOWN, Unkown, Unknown, Number, More, 101, 146, 122, 208, 94, Few, others, Few, some, Many, Sevaral, several, Several More, Several others, Council of elders, Council of war, Several, Majority, many, Gang, Several, Small gang, 3+, 100+, 23 Families, 28 families, 30-40, 50+, 4500, 800, 6+, 10+, NA, 48Unknown levels in f: unKnown, unknown, UnKnown, UNKNOWN, Unkown, Unknown, Number, More, 101, 146, 122, 208, 94, Few, others, Few, some, Many, Sevaral, several, Several More, Several others, Some, Council of elders, Council of war, Several, Majority, many, Gang, Several, Small gang, 3+, 100+, 23 Families, 28 families, 30-40, 50+, 4500, 800, 6+, 10+, NA, 48Unknown levels in f: unKnown, unknown, UnKnown, UNKNOWN, Unkown, Number, More, 101, 146, 122, 208, 94, Few, others, Few, some, Sevaral, Several More, Several others, Some, Council of elders, Council of war, Majority, many, Gang, Small gang, 3+, 100+, 23 Families, 28 families, 30-40, 50+, 4500, 800, 6+, 10+, NA, 48

events <- events %>% mutate_at(.vars = c(
  "government_killed_clean", "government_wounded_clean", "government_captured_clean",
  "rebels_killed_clean", "rebels_wounded_clean", "rebels_captured_clean",
  "civilians_killed_clean", "civilians_wounded_clean", "civilians_captured_clean"
), funs(as.numeric))
events <- events %>%
  mutate(rebels_killedwounded_clean = rebels_killed_clean + rebels_wounded_clean) %>%
  mutate(government_killed_wounded_clean = government_killed_clean + government_wounded_clean) %>%
  mutate(rebels_government_killedwounded_clean = rebels_killed_clean + rebels_wounded_clean) %>%
  mutate(rebels_government_killed_clean = rebels_killed_clean + government_killed_clean) %>%
  mutate(rebels_government_civilians_killed_clean = rebels_killed_clean + government_killed_clean + civilians_killed_clean)
events %>% janitor::crosstab(initiator_clean_1_agghigh, type_clean_agghigh) %>% janitor::adorn_crosstab(digits = 1)
'janitor::crosstab' is deprecated.
Use 'tabyl(dat, var1, var2, ...)' instead.
See help("Deprecated")Factor `type_clean_agghigh` contains implicit NA, consider using `forcats::fct_explicit_na`'janitor::adorn_crosstab' is deprecated.
Use 'use the various adorn_ functions instead.  See the "tabyl" vignette for examples.' instead.
See help("Deprecated")
events %>% janitor::crosstab(target_clean_1_agghigh, type_clean_agghigh) %>% janitor::adorn_crosstab(digits = 1)
'janitor::crosstab' is deprecated.
Use 'tabyl(dat, var1, var2, ...)' instead.
See help("Deprecated")Factor `type_clean_agghigh` contains implicit NA, consider using `forcats::fct_explicit_na`'janitor::adorn_crosstab' is deprecated.
Use 'use the various adorn_ functions instead.  See the "tabyl" vignette for examples.' instead.
See help("Deprecated")
events %>% janitor::crosstab(target_clean_1_agghigh, initiator_clean_1_agghigh) %>% janitor::adorn_crosstab(digits = 1)
'janitor::crosstab' is deprecated.
Use 'tabyl(dat, var1, var2, ...)' instead.
See help("Deprecated")'janitor::adorn_crosstab' is deprecated.
Use 'use the various adorn_ functions instead.  See the "tabyl" vignette for examples.' instead.
See help("Deprecated")

Clean Map Coordinates (East Africa Grid System)

cat("\014")

#Cases to handle
#"928141"
#"311449  328445    338443"
#"EASTING 30 and 27"
#"EastLeigh Sect.7"
#"FARM 535/4"
#"HAC  0202"
#"HAC.577236"
#"HZN 974641 & HZN 974651"
#"HZJ. 8595"
#"HZJ. 465765, HZJ. 459771, HZJ. 451756"
#"HZJ 42765, HZJ 42375and HZJ 429761"
#"HZH 960610, HZH 960630, HZH 977538"
#"H.Z.R. 4786"
#"HAD 1708, HAD 1709"
#"HAD 3326/3327"
#"HZJ 42765, HZJ 42375and HZJ 429761"
#"HZJ 9518  9617"
#"HZP 7430, HZP 9029, HZP 6448, HZP 7252, HZP 9448"
events$map_coordinate %>% janitor::tabyl() 
                        .  n      percent valid_percent
                       ??  1 9.552011e-05  0.0001717918
                     1182  1 9.552011e-05  0.0001717918
                     1200  1 9.552011e-05  0.0001717918
                    12053  1 9.552011e-05  0.0001717918
                    12402  1 9.552011e-05  0.0001717918
                     1280  1 9.552011e-05  0.0001717918
                     1281  1 9.552011e-05  0.0001717918
                     1663  1 9.552011e-05  0.0001717918
                   184157  1 9.552011e-05  0.0001717918
                     1870  2 1.910402e-04  0.0003435836
                     1914  1 9.552011e-05  0.0001717918
                     2113  1 9.552011e-05  0.0001717918
                     2122  1 9.552011e-05  0.0001717918
                   214123  1 9.552011e-05  0.0001717918
                   223348  1 9.552011e-05  0.0001717918
                     2287  1 9.552011e-05  0.0001717918
                     2314  1 9.552011e-05  0.0001717918
                     2396  1 9.552011e-05  0.0001717918
                     2405  1 9.552011e-05  0.0001717918
                     2407  1 9.552011e-05  0.0001717918
                     2410  1 9.552011e-05  0.0001717918
                     2555  1 9.552011e-05  0.0001717918
                     2575  1 9.552011e-05  0.0001717918
                     2608  3 2.865603e-04  0.0005153754
                     2619  1 9.552011e-05  0.0001717918
                   270527  1 9.552011e-05  0.0001717918
                     2722  1 9.552011e-05  0.0001717918
                     2727  1 9.552011e-05  0.0001717918
                     2753  1 9.552011e-05  0.0001717918
                     2832  1 9.552011e-05  0.0001717918
                     2849  1 9.552011e-05  0.0001717918
                     2854  1 9.552011e-05  0.0001717918
                     2862  1 9.552011e-05  0.0001717918
                     2863  1 9.552011e-05  0.0001717918
                   287428  1 9.552011e-05  0.0001717918
                     2904  1 9.552011e-05  0.0001717918
                     2923  1 9.552011e-05  0.0001717918
                     2924  1 9.552011e-05  0.0001717918
                     2932  1 9.552011e-05  0.0001717918
                    29774  1 9.552011e-05  0.0001717918
                     3021  1 9.552011e-05  0.0001717918
                     3025  2 1.910402e-04  0.0003435836
   3045  3545  3540  3141  1 9.552011e-05  0.0001717918
                   310189  1 9.552011e-05  0.0001717918
 311449  328445    338443  1 9.552011e-05  0.0001717918
                   315505  1 9.552011e-05  0.0001717918
                     3210  1 9.552011e-05  0.0001717918
                     3257  1 9.552011e-05  0.0001717918
                     3342  1 9.552011e-05  0.0001717918
                     3344  1 9.552011e-05  0.0001717918
                     3351  1 9.552011e-05  0.0001717918
                     3357  2 1.910402e-04  0.0003435836
                   341088  1 9.552011e-05  0.0001717918
                     3555  1 9.552011e-05  0.0001717918
                    36334  1 9.552011e-05  0.0001717918
                     3705  1 9.552011e-05  0.0001717918
                     3709  1 9.552011e-05  0.0001717918
                     3727  1 9.552011e-05  0.0001717918
                     3730  1 9.552011e-05  0.0001717918
                     3743  1 9.552011e-05  0.0001717918
                   375197  1 9.552011e-05  0.0001717918
                     3761  2 1.910402e-04  0.0003435836
                     3833  2 1.910402e-04  0.0003435836
                     3849  1 9.552011e-05  0.0001717918
                     3854  1 9.552011e-05  0.0001717918
                     4021  1 9.552011e-05  0.0001717918
                     4030  1 9.552011e-05  0.0001717918
                     4052  1 9.552011e-05  0.0001717918
                     4126  1 9.552011e-05  0.0001717918
                     4152  3 2.865603e-04  0.0005153754
                     4467  1 9.552011e-05  0.0001717918
                     4646  1 9.552011e-05  0.0001717918
               4872  5271  1 9.552011e-05  0.0001717918
                     5030  1 9.552011e-05  0.0001717918
                   506413  1 9.552011e-05  0.0001717918
                   511558  1 9.552011e-05  0.0001717918
                     5152  1 9.552011e-05  0.0001717918
                     5155  1 9.552011e-05  0.0001717918
                     5246  1 9.552011e-05  0.0001717918
                     5249  1 9.552011e-05  0.0001717918
                     5317  1 9.552011e-05  0.0001717918
                     5448  1 9.552011e-05  0.0001717918
                   552197  1 9.552011e-05  0.0001717918
                     5627  1 9.552011e-05  0.0001717918
                      563  1 9.552011e-05  0.0001717918
                     5720  1 9.552011e-05  0.0001717918
                   587361  1 9.552011e-05  0.0001717918
                   600570  1 9.552011e-05  0.0001717918
                     6020  1 9.552011e-05  0.0001717918
                     6050  2 1.910402e-04  0.0003435836
                   610340  1 9.552011e-05  0.0001717918
                     6154  2 1.910402e-04  0.0003435836
                     6225  2 1.910402e-04  0.0003435836
                    63343  1 9.552011e-05  0.0001717918
                     6424  1 9.552011e-05  0.0001717918
                   644526  1 9.552011e-05  0.0001717918
                     6452  2 1.910402e-04  0.0003435836
                     6638  1 9.552011e-05  0.0001717918
                     6640  1 9.552011e-05  0.0001717918
                     6654  1 9.552011e-05  0.0001717918
                     6740  1 9.552011e-05  0.0001717918
                      677  1 9.552011e-05  0.0001717918
                     6825  1 9.552011e-05  0.0001717918
                     6826  1 9.552011e-05  0.0001717918
                     7039  1 9.552011e-05  0.0001717918
                     7050  1 9.552011e-05  0.0001717918
                     7196  1 9.552011e-05  0.0001717918
                     7247  2 1.910402e-04  0.0003435836
                     7248  1 9.552011e-05  0.0001717918
                     7397  1 9.552011e-05  0.0001717918
                     7552  1 9.552011e-05  0.0001717918
                      757  1 9.552011e-05  0.0001717918
                     7570  1 9.552011e-05  0.0001717918
                     7754  2 1.910402e-04  0.0003435836
                     8134  1 9.552011e-05  0.0001717918
                     8153  1 9.552011e-05  0.0001717918
                     8345  1 9.552011e-05  0.0001717918
                     8351  1 9.552011e-05  0.0001717918
                    85154  1 9.552011e-05  0.0001717918
                     8516  1 9.552011e-05  0.0001717918
                     8556  1 9.552011e-05  0.0001717918
                   878560  1 9.552011e-05  0.0001717918
                     9245  2 1.910402e-04  0.0003435836
                   928141  1 9.552011e-05  0.0001717918
                     9649  1 9.552011e-05  0.0001717918
        EASTING 30 and 27  1 9.552011e-05  0.0001717918
         EastLeigh Sect.7  1 9.552011e-05  0.0001717918
               FARM 535/4  1 9.552011e-05  0.0001717918
                 GZM 6590  1 9.552011e-05  0.0001717918
                 H?? 4397  1 9.552011e-05  0.0001717918
               H.D.597254  1 9.552011e-05  0.0001717918
              H.Z.R. 4786  2 1.910402e-04  0.0003435836
                 HAA 8106  1 9.552011e-05  0.0001717918
                HAB  7310  2 1.910402e-04  0.0003435836
                 HAB 0535  1 9.552011e-05  0.0001717918
                 HAB 0578  1 9.552011e-05  0.0001717918
                 HAB 1316  2 1.910402e-04  0.0003435836
                 HAB 2106  1 9.552011e-05  0.0001717918
                 HAB 2244  1 9.552011e-05  0.0001717918
                 HAB 5640  1 9.552011e-05  0.0001717918
                 HAB 5806  4 3.820804e-04  0.0006871672
                 HAB 5830  1 9.552011e-05  0.0001717918
                 HAB 6232  2 1.910402e-04  0.0003435836
                 HAB 6234  1 9.552011e-05  0.0001717918
                 HAB 6305  1 9.552011e-05  0.0001717918
                 HAB 6401  1 9.552011e-05  0.0001717918
                 HAB 6403  1 9.552011e-05  0.0001717918
                 HAB 6405  1 9.552011e-05  0.0001717918
                 HAB 6505  5 4.776005e-04  0.0008589589
                 HAB 6507  1 9.552011e-05  0.0001717918
                 HAB 6609  2 1.910402e-04  0.0003435836
                 HAB 6630  1 9.552011e-05  0.0001717918
                 HAB 6639  2 1.910402e-04  0.0003435836
               HAB 670100  1 9.552011e-05  0.0001717918
                 HAB 6707  3 2.865603e-04  0.0005153754
                 HAB 6709  1 9.552011e-05  0.0001717918
                 HAB 6710  1 9.552011e-05  0.0001717918
                 HAB 6734  3 2.865603e-04  0.0005153754
                 HAB 6788  1 9.552011e-05  0.0001717918
                 HAB 6804  2 1.910402e-04  0.0003435836
                 HAB 6807  2 1.910402e-04  0.0003435836
                 HAB 6811  1 9.552011e-05  0.0001717918
                 HAB 6818  3 2.865603e-04  0.0005153754
                 HAB 6829  1 9.552011e-05  0.0001717918
                 HAB 6841  2 1.910402e-04  0.0003435836
                 HAB 6904  2 1.910402e-04  0.0003435836
                 HAB 6908  1 9.552011e-05  0.0001717918
                 HAB 6913  1 9.552011e-05  0.0001717918
                 HAB 6921  4 3.820804e-04  0.0006871672
                 HAB 6941  1 9.552011e-05  0.0001717918
                 HAB 7001  1 9.552011e-05  0.0001717918
                 HAB 7002  1 9.552011e-05  0.0001717918
                 HAB 7013  1 9.552011e-05  0.0001717918
                 HAB 7016  1 9.552011e-05  0.0001717918
                 HAB 7024  1 9.552011e-05  0.0001717918
                 HAB 7040  1 9.552011e-05  0.0001717918
                 HAB 7045  1 9.552011e-05  0.0001717918
                 HAB 7102  1 9.552011e-05  0.0001717918
                 HAB 7107  1 9.552011e-05  0.0001717918
                 HAB 7135  1 9.552011e-05  0.0001717918
                 HAB 7145  1 9.552011e-05  0.0001717918
                 HAB 7200  2 1.910402e-04  0.0003435836
                 HAB 7208  1 9.552011e-05  0.0001717918
                 HAB 7211  1 9.552011e-05  0.0001717918
                 HAB 7214  2 1.910402e-04  0.0003435836
                 HAB 7216  1 9.552011e-05  0.0001717918
                 HAB 7231 11 1.050721e-03  0.0018897097
                 HAB 7302  1 9.552011e-05  0.0001717918
                 HAB 7305  1 9.552011e-05  0.0001717918
                 HAB 7308  1 9.552011e-05  0.0001717918
                 HAB 7315  2 1.910402e-04  0.0003435836
                 HAB 7316  1 9.552011e-05  0.0001717918
                 HAB 7405  1 9.552011e-05  0.0001717918
                 HAB 7413  2 1.910402e-04  0.0003435836
                 HAB 7419  1 9.552011e-05  0.0001717918
                 HAB 7501  3 2.865603e-04  0.0005153754
                 HAB 7511  1 9.552011e-05  0.0001717918
                 HAB 7519  1 9.552011e-05  0.0001717918
                 HAB 7526  1 9.552011e-05  0.0001717918
               HAB 753068  1 9.552011e-05  0.0001717918
                 HAB 7605  1 9.552011e-05  0.0001717918
                 HAB 7611  1 9.552011e-05  0.0001717918
                 HAB 7624  4 3.820804e-04  0.0006871672
                 HAB 7630  1 9.552011e-05  0.0001717918
                 HAB 7655  1 9.552011e-05  0.0001717918
                 HAB 7702  1 9.552011e-05  0.0001717918
                 HAB 7712  1 9.552011e-05  0.0001717918
                 HAB 7801  4 3.820804e-04  0.0006871672
                 HAB 7803  1 9.552011e-05  0.0001717918
                 HAB 7807  1 9.552011e-05  0.0001717918
                 HAB 7810  1 9.552011e-05  0.0001717918
                 HAB 7813  1 9.552011e-05  0.0001717918
                 HAB 7815  5 4.776005e-04  0.0008589589
                 HAB 7852  2 1.910402e-04  0.0003435836
                 HAB 7912  1 9.552011e-05  0.0001717918
                 HAB 7933  3 2.865603e-04  0.0005153754
                 HAB 8000  1 9.552011e-05  0.0001717918
                 HAB 8002  1 9.552011e-05  0.0001717918
                 HAB 8003  1 9.552011e-05  0.0001717918
                 HAB 8010  9 8.596810e-04  0.0015461261
                 HAB 8017  1 9.552011e-05  0.0001717918
                 HAB 8027  1 9.552011e-05  0.0001717918
                 HAB 8125 10 9.552011e-04  0.0017179179
                 HAB 8132  1 9.552011e-05  0.0001717918
                 HAB 8200  1 9.552011e-05  0.0001717918
                 HAB 8226  1 9.552011e-05  0.0001717918
                 HAB 8300  4 3.820804e-04  0.0006871672
                 HAB 8303  1 9.552011e-05  0.0001717918
                 HAB 8321  1 9.552011e-05  0.0001717918
                 HAB 8409  3 2.865603e-04  0.0005153754
                 HAB 8423  1 9.552011e-05  0.0001717918
                 HAB 8509  1 9.552011e-05  0.0001717918
                 HAB 8524  1 9.552011e-05  0.0001717918
                 HAB 8604  1 9.552011e-05  0.0001717918
                 HAB 8605  4 3.820804e-04  0.0006871672
                 HAB 8606  3 2.865603e-04  0.0005153754
                 HAB 8710  3 2.865603e-04  0.0005153754
                 HAB 8714  2 1.910402e-04  0.0003435836
                 HAB 8803  1 9.552011e-05  0.0001717918
                 HAB 8906  3 2.865603e-04  0.0005153754
               HAB 895250  1 9.552011e-05  0.0001717918
                 HAB 9002  1 9.552011e-05  0.0001717918
                 HAB 9102  1 9.552011e-05  0.0001717918
                 HAB 9103  1 9.552011e-05  0.0001717918
                 HAB 9117  1 9.552011e-05  0.0001717918
                 HAB 9127  2 1.910402e-04  0.0003435836
                 HAB 9129  1 9.552011e-05  0.0001717918
                 HAB 9204  1 9.552011e-05  0.0001717918
                 HAB 9227  2 1.910402e-04  0.0003435836
                 HAB 9229  1 9.552011e-05  0.0001717918
 [ reached 'max' / getOption("max.print") -- omitted 3774 rows ]
events$map_coordinate_clean <- events$map_coordinate %>% stringr::str_replace_all("[[:punct:]]| ", "") 
(events$map_coordinate_clean_length <- events$map_coordinate_clean %>% nchar() ) %>% janitor::tabyl() %>% round(3)
  .    n percent valid_percent
  0    1   0.000         0.000
  3   10   0.001         0.002
  4  110   0.011         0.019
  5   11   0.001         0.002
  6   29   0.003         0.005
  7 3155   0.301         0.542
  8   38   0.004         0.007
  9 2414   0.231         0.415
 10    6   0.001         0.001
 11   10   0.001         0.002
 12    1   0.000         0.000
 13    2   0.000         0.000
 14   12   0.001         0.002
 15    1   0.000         0.000
 16    3   0.000         0.001
 18    7   0.001         0.001
 19    3   0.000         0.001
 20    1   0.000         0.000
 27    5   0.000         0.001
 28    1   0.000         0.000
 35    1   0.000         0.000
 NA 4648   0.444            NA
(events$map_coordinate_clean_text <- events$map_coordinate_clean %>% gsub("[0-9]", "\\1",.)) %>% janitor::tabyl()  %>% mutate_if(is.numeric, round,2) #Split into a text component and numeric component
(events$map_coordinate_clean_number <- events$map_coordinate_clean %>% gsub("[A-Za-z]", "\\1", .) ) %>% janitor::tabyl()  %>% mutate_if(is.numeric, round,2)
(events$map_coordinate_clean_text_band <- events$map_coordinate_clean_text %>% substring(1,1) ) %>% janitor::tabyl()  %>% mutate_if(is.numeric, round,2)
(events$map_coordinate_clean_text_block <- events$map_coordinate_clean_text %>% substring(2,2) ) %>% janitor::tabyl()  %>% mutate_if(is.numeric, round,2)
(events$map_coordinate_clean_text_subblock <-  events$map_coordinate_clean_text %>%  substring(3,3) ) %>% janitor::tabyl()  %>% mutate_if(is.numeric, round,2)
(events$map_coordinate_clean_number_length <- events$map_coordinate_clean_number %>% nchar() ) %>% janitor::tabyl()  %>% mutate_if(is.numeric, round,2)
(events$map_coordinate_clean_number_easting <- events$map_coordinate_clean_number %>%
                                              substring(1, events$map_coordinate_clean_number_length/2) %>% as.numeric() ) %>%
                                              janitor::tabyl()  %>% mutate_if(is.numeric, round,2)
(events$map_coordinate_clean_number_northing <- events$map_coordinate_clean_number %>%
                                              substring(events$map_coordinate_clean_number_length/2+1, events$map_coordinate_clean_number_length) %>%
                                              as.numeric() )  %>%
                                              janitor::tabyl()  %>% mutate_if(is.numeric, round,2)

Convert Coordinates to lat long

#
cat("\014")
print("Starting Converting Coordinates, may take some time")
for(i in 1:nrow(events)){
  # print(i)
  #The “exposition” pipe operator, %$% exposes the names within the left-hand side object to the right-hand side expression. Essentially, it is a short-hand for using the with functions 
  suppressMessages({
    temp <- with(events[i,] ,
                 MeasuringLandscape:::EAGS2LatLong(band=map_coordinate_clean_text_band,
                                        block=map_coordinate_clean_text_block,
                                        subblock=map_coordinate_clean_text_subblock,
                                        easting=map_coordinate_clean_number_easting , 
                                        northing=map_coordinate_clean_number_northing)
    )
    events$map_coordinate_clean_latitude[i] <- temp$latitude
    events$map_coordinate_clean_longitude[i] <- temp$longitude
  })
  #print(is.na(events$map_coordinate_clean_latitude[i]))
}
print("Finished Converting Coordinates")
#(temp <- events %>% mutate(map_coordinate_clean_row=1:n()) %>% filter(is.na(map_coordinate_clean_latitude) & !is.na(map_coordinate_clean)) %>% select(starts_with("map_coordinate_clean")) ) %>% distinct() %>% print(n=40) #visualize errors
#dim(temp) #195 coordinates don't convert.
testing=F
if(testing){
  i=3684
  events[i,] %>%  select(starts_with("map_coordinate_clean")) %$% EAGS2LatLong(band=map_coordinate_clean_text_band,
                                                                              block=map_coordinate_clean_text_block,
                                                                              subblock=map_coordinate_clean_text_subblock,
                                                                              easting=map_coordinate_clean_number_easting , 
                                                                              northing=map_coordinate_clean_number_northing)
  
  with(events[i,], map_coordinate_clean)
  with(events[i,], map_coordinate)
  band <- with(events[i,], map_coordinate_clean_text_band)
  block <- with(events[i,],map_coordinate_clean_text_block)
  subblock <- with(events[i,],map_coordinate_clean_text_subblock) #
  easting <- with(events[i,],map_coordinate_clean_number_easting)
  northing <- with(events[i,],map_coordinate_clean_number_northing)
}
stats::quantile(events$map_coordinate_clean_latitude, probs =c(.005,.01,.1,.5,.9,.99,.995), na.rm=T, type=9) 
      0.5%         1%        10%        50%        90%        99%      99.5% 
-1.7190692 -1.5442831 -1.0403576 -0.4886414  0.0452442  0.7058103  2.2342679 
stats::quantile(events$map_coordinate_clean_longitude, probs =c(.005,.01,.1,.5,.9,.99,.995), na.rm=T, type=9)
    0.5%       1%      10%      50%      90%      99%    99.5% 
35.67124 35.78910 36.36309 36.93511 37.45506 38.26145 39.87161 
#plot(events$map_coordinate_clean_longitude,events$map_coordinate_clean_latitude) #plot with the outliers
#This is just to remove absolutely clear outliers. Not to set the region of interest.
#Outlier Bounding Box:
#NE 4.62933, 41.899059
#SW -4.71712, 33.90884
events$map_coordinate_clean_latitude[events$map_coordinate_clean_latitude < -4.71712 |
                                      events$map_coordinate_clean_latitude>4.62933] <- NA
events$map_coordinate_clean_longitude[events$map_coordinate_clean_longitude < 33.90884 |
                                      events$map_coordinate_clean_longitude>41.899059] <- NA
plot(events$map_coordinate_clean_longitude,events$map_coordinate_clean_latitude)

District of Document

cat("\014")

#clean document district
events$document_district_clean <- events$document_district %>% stringi::stri_trans_totitle() %>% stringr::str_trim() %>% as.factor()
events$document_district_clean <- events$document_district_clean %>% forcats::fct_collapse( 
                                   'Embu'=c('Embu-Fort Hall Border'),
                                   'Baringo'=c('BARINGO'),
                                   'FORT HALL'=c('Fort Hall'),
                                   'Naivasha'=c('Naviasha'),
                                   'Nyeri' = c('Nyeri Settled Area'),
                                   'Nyeri' = c('South Nyeri Reserve') ,
                                   'Nairobi' = c('Jock Scott'),
                                   NULL=c('',' ' ,'Document District','Kitui','Matathia', 'H/M','Reference Serial')
)

Unknown levels in f: BARINGO

events$document_unit_type <- NA
condition <- events$document_district_clean %in% c("Rift Valley","Central Province"); table(condition)

condition FALSE TRUE 9278 1191

events$document_unit_type[condition] <- "Province"
#Jock Scott Nairobi City
condition <- events$document_district_clean %in% c("Nairobi"); table(condition)

condition FALSE TRUE 10075 394

events$document_unit_type[condition] <- "City"
condition <- events$document_district %in% c("JOCK SCOTT"); table(condition)

condition FALSE TRUE 10357 112

events$document_unit_type[condition] <- "Operation Jock Scott"
#Missing? Elgeyo/Marakwet
#Baringo, , Embu, Fort Hall, Kajiado, Kiambu, Kitui, Laikipia, Machakos, Meru, Naivasha, Nakuru, Nanyuki, Narok, Nyeri, Thika
condition <- events$document_district_clean %in% c("Baringo","Embu","Fort Hall","Kajiado","Kiambu",
                                                 "Laikipia","Machakos","Meru","Naivasha","Nakuru",
                                                  "Nanyuki","Narok","Nyeri","Thika"); table(condition)

condition FALSE TRUE 2808 7661

events$document_unit_type[condition] <- "District"
events$document_unit_type %>%
  janitor::tabyl(sort = TRUE) %>%
  janitor::adorn_crosstab(digits = 1)

‘janitor::adorn_crosstab’ is deprecated. Use ‘use the various adorn_ functions instead. See the “tabyl” vignette for examples.’ instead. See help(“Deprecated”)

events$document_district_clean  %>%
  janitor::tabyl(sort = TRUE) %>%
  janitor::adorn_crosstab(digits = 1)

Factor dat contains implicit NA, consider using forcats::fct_explicit_na‘janitor::adorn_crosstab’ is deprecated. Use ‘use the various adorn_ functions instead. See the “tabyl” vignette for examples.’ instead. See help(“Deprecated”)

Handle suffixes and directions

# Now we need to handle suffixes and combined locations
# "farm" now is followed by things because they crunched in additional location info at the end
## "coles estate farm
## agriculture experimental farm
## demonstration farm
## "farm near churo"
## reubens farm near churo
################################################
events$location_text_ruleclean <- events$location_text %>% stringr::str_trim() %>% tolower()
events <- events %>% 
          dplyr::select(-one_of("location_text_ruleclean_connector_prefix","location_text_ruleclean_connector_suffix")) %>%  #this intentionally throws a warning
          tidyr ::separate(col=location_text_ruleclean,
                    into=c("location_text_ruleclean_connector_prefix","location_text_ruleclean_connector_suffix"),
                    sep = " of | near ", remove=F, extra="drop", fill="right")
Unknown columns: `location_text_ruleclean_connector_prefix`, `location_text_ruleclean_connector_suffix`
events <- events %>% mutate(name_clean=stringr::str_trim(tolower(location_text))) %>%
           mutate(name_clean_posessive=grepl("'s|`s",name_clean)) %>%
           mutate(name_cleaner=trimws(name_clean)  ) %>%
           mutate(name_cleaner=gsub("'s|`s","",name_cleaner, fixed=T)  ) %>%
           mutate(name_cleaner= stringr::str_replace_all(name_cleaner, "[[:punct:]]|`", "")  ) %>% 
           mutate(name_cleaner= stringr::str_replace_all(name_cleaner, "[^[:alnum:] ]", "")  ) %>%  #removes all the weird unicode and ascii
           mutate(name_cleaner=trimws(name_cleaner)  ) %>%
           mutate(name_cleaner_nospace= stringr::str_replace_all(name_cleaner, " ", "") )

Create a Simple Features Version GIS Version

events_sf <- events %>% # filter(!is.na(longitude) & !is.na(latitude))  %>%
  distinct() %>%
  # filter( between(longitude, 30.0,45.0)  )  %>%  #Flag ROI but don't subset on it yet
  # filter( between(latitude, -5.0,5.0) ) %>%
  mutate(name_clean = stringr::str_trim(tolower(location_text))) %>%
  mutate(name_clean_posessive = grepl("'s|`s", name_clean)) %>%
  mutate(name_cleaner = trimws(name_clean)) %>%
  mutate(name_cleaner = gsub("'s|`s", "", name_cleaner, fixed = T)) %>%
  mutate(name_cleaner = stringr::str_replace_all(name_cleaner, "[[:punct:]]|`", "")) %>%
  mutate(name_cleaner = trimws(name_cleaner)) %>%
  mutate(name_cleaner_nospace = stringr::str_replace_all(name_cleaner, " ", ""))
# Avoid creating geometries where one of the two is NA
events_sf$map_coordinate_clean_longitude[is.na(events_sf$map_coordinate_clean_latitude)] <- NA
events_sf$map_coordinate_clean_latitude[is.na(events_sf$map_coordinate_clean_longitude)] <- NA
#events_sf$event_hash <- NULL #Make sure we're not hashing on the previous hash which might be a random walk
events_sf <- events_sf %>%
            sf::st_as_sf(coords = c("map_coordinate_clean_longitude", "map_coordinate_clean_latitude"),
                     crs = 4326, agr = "constant", remove = F, na.fail = F) # %>% 
             #mutate(event_hash = apply(., 1, digest, algo="xxhash64") ) #Do this once and only once
valid <- sf::st_is_valid(events_sf$geometry); table(valid)
valid
 TRUE 
10469 
eventsnames_sf <- events_sf %>% 
  select("name_cleaner", "geometry") %>% 
  setNames(c("name", "geometry")) %>% 
  mutate(source_dataset = "events")

Output Cleaned Files

saveRDS(events_sf, glue::glue(getwd(), "/../inst/extdata/events_sf.Rdata"))
