This is the entry point for the paper “Measuring the Landscape of Civil War.”
In this file, a raw csv file of the events dataset created for the Mau Mau rebellion is loaded and processed.
rm(list=ls()); gc() #clear objects from memory
library(MeasuringLandscape)
library(tidyverse) #load independently just to make sure %>% gets imported
knitr::opts_knit$set(progress = TRUE, verbose = TRUE)
knitr::opts_chunk$set(fig.width = 12, fig.height = 8, warning = FALSE, message = FALSE, cache = TRUE)
options(width = 160)
events <- MeasuringLandscape:::prep_events(fromscratch = F)
dim(events)
[1] 10469 25
Basic cleaning. Format is usually DD.MM.YYYY but sometimes multiple days are included by DD1/DD2/MM/YY. Sometimes year is YY or YYYY.
#p_load(date)
events <- events %>%
dplyr::mutate(event_date_clean=event_date) %>%
dplyr::mutate(event_date_clean=stringr::str_replace_all(event_date_clean,"[[:digit:]]+/", "")) %>%
dplyr::mutate(event_date_clean=stringr::str_replace_all(event_date_clean,"[[:digit:]]+/", "")) %>% #strip off extra day at the front 01/02.12.1950
dplyr::mutate(event_date_clean=stringr::str_replace_all(event_date_clean,"\\.", "/")) %>% #Convert periods to slashes
dplyr::mutate(event_date_clean=trimws(event_date_clean)) %>% #trim whitespace
dplyr::mutate(event_date_clean=stringr::str_replace_all(event_date_clean,"/52", "/1952")) %>% #convert 2 digit years to 4 digit years
dplyr::mutate(event_date_clean=stringr::str_replace_all(event_date_clean,"/53", "/1953")) %>%
dplyr::mutate(event_date_clean=stringr::str_replace_all(event_date_clean,"/54", "/1954")) %>%
dplyr::mutate(event_date_clean=stringr::str_replace_all(event_date_clean,"/55", "/1955")) %>%
dplyr::mutate(event_date_clean=stringr::str_replace_all(event_date_clean,"/56", "/1956")) %>%
dplyr::mutate(event_date_clean=stringr::str_replace_all(event_date_clean,"/19524", "/1954")) %>% #clean typo
dplyr::mutate(event_date_clean= lubridate::dmy(event_date_clean) ) #Feed to lubridate
67 failed to parse.
events %>% filter(is.na(event_date_clean)) %>% dplyr::select(starts_with("event_date")) %>% distinct() %>% print(n=40) #visualize errors
events$event_date_clean_year <- lubridate::year(events$event_date_clean)
events$event_date_clean_year %>% janitor::tabyl() %>% round(3)
. n percent valid_percent
953 1 0.000 0.000
1855 1 0.000 0.000
1952 157 0.015 0.020
1953 3025 0.289 0.384
1954 3165 0.302 0.402
1955 1330 0.127 0.169
1956 112 0.011 0.014
1957 27 0.003 0.003
1958 7 0.001 0.001
1985 1 0.000 0.000
1995 18 0.002 0.002
2005 1 0.000 0.000
2013 2 0.000 0.000
2023 1 0.000 0.000
2057 21 0.002 0.003
2058 8 0.001 0.001
2060 1 0.000 0.000
2061 1 0.000 0.000
NA 2590 0.247 NA
How often are event dates missing?
table(is.na(events$event_date))
FALSE TRUE
7946 2523
The documents also have dates, sometimes spanning a period of time. Can use that to nail down missing dates.
(events$document_date_type <- events$document_date %>%
tolower() %>%
mosaic::derivedFactor(
"unknown" = T,
"missing" = stringr::str_detect(.,"obscured|missing|illegible|xx|Document missing"),
"on the" = stringr::str_detect(.,"on the"),
"to" = stringr::str_detect(.," to"),
"for" = stringr::str_detect(.,"For "),
"week" = stringr::str_detect(.,"week"),
"week ending" = stringr::str_detect(.,"week ending"),
"period" = stringr::str_detect(.,"period"),
"fortnight" = stringr::str_detect(.,"fortnight"),
"ending" = stringr::str_detect(.,"ending"),
.method = "last",
.default = "unknown"
)
) %>% janitor::tabyl()
. n percent
0 0.000000000
unknown 877 0.083771134
missing 324 0.030948515
on the 101 0.009647531
to 1640 0.156652975
for 0 0.000000000
week 130 0.012417614
week ending 0 0.000000000
period 226 0.021587544
fortnight 562 0.053682300
ending 6609 0.631292387
events$document_date_clean <- events$document_date %>% tolower() %>%
stringr::str_replace_all("Fortnight Ended |period|week ending|for |the |fortnight |ending |week |From |on ","") %>%
stringr::str_replace_all("[Digits]*th|[Digits]*st|[Digits]*rd|[Digits]*nd","")
events <- events %>%
dplyr::select(-one_of("document_date_1","document_date_2")) %>% #separate will continue to add columns every time its run
tidyr ::separate(col=document_date_clean,
into=c("document_date_1","document_date_2"),
sep = " to|to |To | - ", remove=F, extra="drop", fill="right")
Unknown columns: `document_date_1`, `document_date_2`
events$document_date_clean_1 <- events$document_date_1 %>%
stringr::str_replace_all("[[:digit:]]+/", "") %>% #strip off extra day at the front 01/02.12.1950
stringr::str_replace_all("\\.", "/") %>% #Convert periods to slashes
trimws() %>%
lubridate::dmy()
2356 failed to parse.
events$document_date_clean_2 <- events$document_date_2 %>%
stringr::str_replace_all("[[:digit:]]+/", "") %>% #strip off extra day at the front 01/02.12.1950
stringr::str_replace_all("\\.", "/") %>% #Convert periods to slashes
trimws() %>%
lubridate::dmy()
257 failed to parse.
events %>% filter(is.na(document_date_clean_1)) %>% dplyr::select(starts_with("document_date")) %>% distinct() %>% print(n=40) #visualize errors
events$document_date_best_date <- events$document_date_clean_2
condition <- is.na(events$document_date_best_date)
events$document_date_best_date[condition] <- events$document_date_clean_1[condition]
(events$document_date_best_year <- lubridate::year(events$document_date_best_date)) %>% janitor::tabyl() %>% round(3)
. n percent valid_percent
1952 101 0.010 0.011
1953 3275 0.313 0.348
1954 3751 0.358 0.398
1955 1892 0.181 0.201
1956 281 0.027 0.030
1957 68 0.006 0.007
1958 26 0.002 0.003
1959 4 0.000 0.000
1960 4 0.000 0.000
1961 4 0.000 0.000
2052 10 0.001 0.001
NA 1053 0.101 NA
cat("\014")
events$type_clean <- stringr::str_trim(stringi::stri_trans_totitle(events$type))
events$type_clean_agglow <- events$type_clean %>%
stringr::str_trim() %>%
tolower() %>%
forcats::fct_collapse( #removed Car dependency for forcats
'desertion'='desertion',
'escape'='escape',
'abduction'=c('abduction','kidnapping','kidnap','kitnap','kindnap'),
'assault'=c('assault','attack','assaulted','assaults','assualt','assult'),
'murder'=c('murder','elimination','kidnap / murder',''),
'arson'=c('arson','burn'),
'cattle slashing'=c('slashed','stampede'),
'vandalism'='vandalism',
'theft'=c('theft','thefts','thet','missing','lost','entry'),
'punishment'=c('confiscate','sentenced'),
'rebel capture'=c('capture','captured'),
'oathing'=c('oath','oathing','recruitment','recruited'),
'contact'=c('contact','caontact','contacts','drove off','drive off','drove off',
'chased off','broke up oathing','ambush'),
'patrol'=c('patrol','police and kpr patrol','sweep'),
'screening'=c('screening','sreening'),
'unclassified'=c('type')
)
Unknown levels in f
: kidnapping, , recruitment, patrol
events$type_clean_agglow %>%
janitor::tabyl(sort = TRUE) %>%
janitor::adorn_crosstab(.,digits = 1)
Factor dat
contains implicit NA, consider using forcats::fct_explicit_na
‘janitor::adorn_crosstab’ is deprecated. Use ‘use the various adorn_ functions instead. See the “tabyl” vignette for examples.’ instead. See help(“Deprecated”)
(events$type_clean_aggmed <- events$type_clean_agglow %>% forcats::fct_collapse(
'physical violence'=c('abduction','assault','murder'),
'property destruction'=c('vandalism','arson','cattle slashing'),
'theft'=c('theft'),
'security operations'=c('contact','screening','sreening','patrol','punishment'),
'unclassified'=c('desertion','escape','unclassified')
)
) %>%
janitor::tabyl(sort = TRUE) %>%
janitor::adorn_crosstab(digits = 1)
Unknown levels in `f`: sreeningFactor `dat` contains implicit NA, consider using `forcats::fct_explicit_na`'janitor::adorn_crosstab' is deprecated.
Use 'use the various adorn_ functions instead. See the "tabyl" vignette for examples.' instead.
See help("Deprecated")
NA
(events$type_clean_agghigh <- events$type_clean_aggmed %>% forcats::fct_collapse(
'rebel activity'=c('oathing','physical violence','property destruction','theft'),
'government activity'=c('rebel capture','security operations')
)
) %>%
janitor::tabyl(sort = TRUE) %>%
janitor::adorn_crosstab(digits = 1)
Factor `dat` contains implicit NA, consider using `forcats::fct_explicit_na`'janitor::adorn_crosstab' is deprecated.
Use 'use the various adorn_ functions instead. See the "tabyl" vignette for examples.' instead.
See help("Deprecated")
Collapsed Initiators to just Rebels, Government, and Civilians
cat("\014")
initiator_target_master_clean <- list(
'ammunition'=c('ammunition'),
'explosives'=c('explosives', 'gelignite'),
'firearms'=c('firearms','arms', 'firearm', 'gun', 'pistol', 'rifle', 'ammunition', 'rifile', 'shotgun', 'verey pistol'),
'other weapons'=c('other weapons','axe','scabbard','weapons'),
'colonial authorities'=c('colonial authorities','councillor', 'district commissioner', 'district officer', 'forest ranger', 'game ranger', 'game warden', 'government', 'government employees', 'port authority', 'public works department', 'screening team' , 'do', 'govrnment', 'wakamba screening team', 'do munuga','african do','dcmeru', 'colonial authorities' ,'govtemployee' ),
'tribal authorities'=c('tribal authorities','chief', 'elders', 'headman' , 'chief chostram','chief eliud', "chief's sentry") ,
'private property'=c('private property','buildings', 'cattle dip', 'duka', 'farms', 'garage', 'homes','huts', 'hotel', 'land rover', 'lorry', 'market', 'office', 'oxcart', 'property', 'pump house', 'sawmill', 'shops', 'stores', 'tractor', 'vehicle', 'windmill' , "bullock's farm",'cattle boma','coffe trees','coffee trees', 'cuthouse','dairy farm','dip','house','household', 'houses','hut','instrument','labour camp post','labour huts','lorries','lucerne sheds','maize shamba', 'milk factory','pig sty','private property', 'property of civilians','shop','store','thika fishing camp','vehicles'),
'cash'=c('cash', 'funds', 'money' , "conductor's takings"),
'food'=c('banana', 'barley', 'bran', 'cabbage', 'coffee', 'corn', 'cream', 'crops', 'dairy', 'food', 'fruit', 'grain', 'honey', 'maize', 'meat', 'milk', 'oats', 'posho', 'potatoes', 'sugar', 'vegetable', 'wheat',
'food','food etc','food store','food stores','foodstuffs','fruits','grains', 'grains+cloth +money','green maize cobs','potato','potato store','potatos','skimmed milk','sugar cane','sugar maize','vegetables','vegitable garden', 'vegitables','wheat bags','wheat store','wheet','whisky') ,
'livestock'=c('beast', 'cattle', 'cow', 'herd', 'livestock', 'pig', 'sheep', 'steer', 'stock',
'animal', 'bulls','calf','calves','chicken','cows','donkey','goat','goats',
'head of cattle','head of cow','head of sheep','heifer','heifers',
'lamb','live stock','livestock','livestocks','masai herd','milk cow','ox','ox cart',
'oxen','ram','red poll cattle','shee','sheep or ox','steers','stocks' ),
'medicine'=c('medical supplies', 'medicine', 'm&b tablets', 'medicines'),
'supplies'=c('supplies','bags', 'bedding', 'blankets', 'books', 'charcoal', 'cloth', 'clothing', 'cooking utensils', 'cutlery', 'equipment', 'farm implements',
'household items','instruments', 'iron', 'pails','petrol', 'provisions', 'oil', 'sacks', 'supplies', 'tarpaulin', 'thatch', 'timber',
'tobacco', 'tools', 'uniforms', 'wire', 'wireless set', 'whiskey', 'articles','bag','battery','bucket','ciga','cigarettes','clothes',
'clothing etc','cloths','dairy item','dairy record book','goods', 'material','oil+tins','provisionv','railway uniforms','supplies', 'tarpaulian','typewriter','v- drive belts', 'gunny bags' ),
'church'=c('church'),
'infrastructure'=c('airstrip', 'bridges', 'half built village', 'roads', 'trenches', 'water tank','bridge', 'bridge broken', 'bridge damaged', 'infrastructure', 'milt property', 'miltproperty', 'prison camp','stn damaged'),
'school'=c('school', 'school','school building','school house','school property','schools'),
'home guard'=c('bg','kg','eg', 'guard','embu guard', 'farm guard', 'forest guard', 'home guard','ikandine guard', 'kathanjure guard', 'kijabe guard',
'kikuyu guard', 'masai guard', 'meru guard', 'nandi guard', 'nkubu guard', 'stock guard', 'tigoni guard','tp and eg patrol','hg','tp patrol','home guard patrol',
'm', 'm/g','m/g patrol','g', 'kathanjure hg','k g', 'ng', 'eg patrol', 'hg camp','hg leader','hg patrol','hg post','home','home guard','kg post'),
'arab combat units'=c('arab combat' , 'arab combat unit','arab combat units'),
'asian combat units'=c('asian combat', 'asian combat unit', 'asian combat team', 'second asian combat unit','asian combat units' ),
'kings african rifles'=c('kings african rifles','3 kar', '4 kar', '5 kar', '6 kar', '7 kar', '23 kar', '26 kar','k.a.r','k.p.r','k.a.r.', '5th k.a.r','5kar','5 k.a.r','4th kar','kar' ) ,
'british military'=c('british military', 'devonshire regiment','devons', 'field intelligence assistant', 'field intelligence officer', 'fio', 'gloucestershire regiment', 'glosters', 'lancashire fusiliers',
"king's shropshire light infantry", 'royal east kent regiment', 'buffs', 'royal fusiliers', 'royal highland regiment','black watch', 'watch', 'royal inniskilling fusiliers', 'royal irish fusiliers', 'royal northumberland fusiliers', 'rnf','police and military', 'army' , 'lancashire fusilliers', 'sp company 1 royal innisks', '1 rnf', 'rif', 'ksli', 'inniskillings', 'fia','1 glosters', '1 bw', '1 buffs',
'\"a\" company 1 royal innisks', '\"a\" company', 'royal fusilers', 'of devons','of 1 glosters', 'lanc fus', 'fusiliers', 'fio kruger','fios','a co devon','4 platoon support company',
'\"c\" company1 royal innisks','6 platoonsp company 1 royal innisks','1 lf', '\"c\" company', '\"d\" company','\"a\"','\"a\" company bw','buffs ambush','d company',
"d' force",'devens', 'c company','\"d\" force', 'army officer', 'british army officer', 'british military', 'buffs patrol', 'european officer', 'european soldiers', 'gloster patrol' ),
'kenya regiment'=c('kenya regiment','captain folliott’s team' , 'kr', 'kenreg', 'kenregg','kenya regiment sergeant', 'kenya regt','keniya regiment','kenya regiment private'),
'military (generic)'=c('military (generic)', 'captain', 'company', 'military', 'army', 'military property', 'platoon', 'security forces', 'security force', 'coy', 'striking force' ,'sentry', 'non commissioned officers', 'patrol', 'sentrie', 'sgt white' ),
'psuedo gangs'=c('psuedo gangs','pseudo gang', 'pseudo team', 'trojan', 'psuedo gangs', 'trojan team' , 'tracker group', 'pseudo teams'),
'royal air force'=c('royal air force','raf', 'bombers', 'air strike', 'harvards', 'raf lincolns','flying squard'),
'paramilitary'=c('paramilitary','general service unit', 'gsu' ),
'cid'=c('cid'),
'kenya police'=c('kenya police', 'kp' , "kp constables' quarters", 'kpa' ),
'kenya police reserve'=c('kenya police reserve', 'kpr', 'kpr officers', 'reserve police officer', 'rpo' , 'rpos', 'police and k.p.r'),
'police (generic)'=c('police (generic)','constable', 'police', 'polce','policy party'),
'railway police'=c('railway police' ),
'special branch'=c('special branch', 'blue doctor team', 'special branch team', 'sb officers' ),
'tribal police'=c('githumu police', 'masai special constable', 'tribal police', 'tp' , 'tpeg','african constable', 'african costable', 'african special constable', 'tribal police'),
'tribal police reserve'=c('tribal police reserve', 'tpr') ,
'communities'=c('communities','manyatta', 'fishing camp', 'sublocation', 'village', 'camp' , 'villages') ,
'detainees'=c('detainees', 'prisoner', 'prisoners'),
'suspected insurgents'=c('suspected insurgents','bandits', 'food foragers', 'gangs', 'gang', 'kiama kia muingi' , 'kkm', 'komerera' , 'mau mau', 'oath administrator', 'passive wing',
'rebels', 'suspects', 'terrorists','terrorosts','terrorist', 'gunman', 'terorist', 'gunmen', 'resistance group','resistance groups', 'oath administrater','oath administrators','passive wing members', 'resistance','suspect', 'suspected insurgents','terroist','terroists','terrost') ,
'civilians'=c('civilians','africans', 'children', 'civilian', 'driver', 'employees', 'evangelist', 'family', 'farm boys', 'girls', 'informer',
'kikuyu', 'laborour', 'loyalist', 'masai', 'men', 'mission staff', 'owner', 'passengers', 'people', 'tugen tribesmen' , 'stranger', 'sikh',
'herd boys', 'isiolo game scouts', 'farm labour', 'farmer', 'european', 'employer', 'employee', 'civilan','shopkeeper' , 'students', 'teachers',
'turkana', 'vigilantes', 'women', 'workers','villagers', 'labour', 'local labour', 'kikuyus', 'embu', 'tiriki houseboy', 'samburu', 'manager', 'woman',
'vetofficer', 'mrhiggins', 'masai party','kuria tribesmen','manager of akira estates', 'kuria tribesmen','chstephen','african',
'catholic misson staff', 'african staff', 'asian women', 'bus conductor', 'child', 'civilian(food carriers)', 'civilian(schoolmaster)', 'civilians',
'civilion', 'committee', 'committee member', 'courier','elder','embu tractor driver', 'employees of club','engine boy','girl','golf club staff','his own hut',
'hotel keeper','houseboy','illegal residents','indian','interpreter','kem','kikiyu', 'kikuyu assessor','kikuyu families','kikuyu houseboy','kikuyu labourer','kikyu',
'kirua village','labour line','labour lines','labourer','labourers', 'laboures','labourline','labours','males','man','maragoli','maragoli labourer',
'masai elders','masai tribesman','members of the thika committee', 'mna section leaders','municipal inspectors','non kikuyu employees','person',
'prostitutes','purke masai','pwd employee','railway employees', 'school master','school teacher','sisters committee','somali','staff','strangers',
'taxi drivers','teacher','treasurers', "headman's son","norton traill's labour","gordon's labour", 'food carriers')
)
regex <- "\\.|patrol|[1-9]\\s*rd|[1-9]\\s*th" # with regex start trying to get more of these to automatically map instead of generating lots of hand codings
events$initiator_clean <- events$initiator %>% stringr::str_trim() %>% gsub(regex, "", ., ignore.case =T) %>% tolower()
events <- events %>%
dplyr::select(-one_of("initiator_clean_1", "initiator_clean_2", "initiator_clean_3")) %>% # separate will continue to add columns every time its run
tidyr ::separate(
col = initiator_clean,
into = c("initiator_clean_1", "initiator_clean_2", "initiator_clean_3"),
sep = "and|\\\\|/|\\&|,", remove = F, extra = "drop", fill = "right"
)
Unknown columns: `initiator_clean_1`, `initiator_clean_2`, `initiator_clean_3`
events <- events %>%
mutate_at(vars(starts_with("initiator_clean_")), funs(gsub(".*police.*", "police", ., ignore.case =T))) %>%
mutate_at(vars(starts_with("initiator_clean_")), funs(gsub(".*guard.*", "guard", ., ignore.case =T))) %>%
mutate_at(vars(starts_with("initiator_clean_")), funs(gsub(".*terror.*|.*mau mau.*|.*gang.*", "terrorist", ., ignore.case =T))) %>%
mutate_at(vars(starts_with("initiator_clean_")), funs(gsub(".*kpr.*|.*k p r.*", "kpr", ., ignore.case =T))) %>%
mutate_at(vars(starts_with("initiator_clean_")), funs(gsub(".*kar.*|.*k a r.*", "kar", ., ignore.case =T))) %>%
mutate_at(vars(starts_with("initiator_clean_")), funs(gsub(".*coy.*", "coy", ., ignore.case =T))) %>%
mutate_at(vars(starts_with("initiator_clean_")), funs(gsub(".*gsu.*", "gsu", ., ignore.case =T))) %>%
mutate_at(vars(starts_with("initiator_clean_")), funs(gsub(".*watch.*", "watch", ., ignore.case =T))) %>%
mutate_at(vars(starts_with("initiator_clean_")), funs(trimws(.)))
events <- events %>%
mutate(initiator_clean_1_agglow = do.call(forcats::fct_collapse, c(list(events$initiator_clean_1), initiator_target_master_clean)) %>% as.character() ) %>%
mutate(initiator_clean_2_agglow = do.call(forcats::fct_collapse, c(list(events$initiator_clean_2), initiator_target_master_clean)) %>% as.character()) %>%
mutate(initiator_clean_3_agglow = do.call(forcats::fct_collapse, c(list(events$initiator_clean_3), initiator_target_master_clean)) %>% as.character())
Unknown levels in `f`: ammunition, explosives, gelignite, firearms, arms, firearm, gun, pistol, rifle, ammunition, rifile, shotgun, verey pistol, other weapons, axe, scabbard, weapons, colonial authorities, councillor, district commissioner, district officer, government employees, port authority, public works department, colonial authorities, govtemployee, tribal authorities, elders, chief's sentry, private property, buildings, cattle dip, duka, farms, garage, homes, huts, hotel, land rover, lorry, market, office, oxcart, property, pump house, sawmill, shops, stores, tractor, vehicle, windmill, bullock's farm, cattle boma, coffe trees, coffee trees, cuthouse, dairy farm, dip, house, household, houses, hut, instrument, labour camp post, labour huts, lorries, lucerne sheds, maize shamba, milk factory, pig sty, private property, property of civilians, shop, store, thika fishing camp, vehicles, cash, funds, money, conductor's takings, banana, barley, bran, cabbage, coffee, corn, cream, crops, dairy, food, fruit, grain, honey, maize, meat, milk, oats, posho, potatoes, sugar, vegetable, wheat, food, food etc, food store, food stores, foodstuffs, fruits, grains, grains+cloth +money, green maize cobs, potato, potato store, potatos, skimmed milk, sugar cane, sugar maize, vegetables, vegitable garden, vegitables, wheat bags, wheat store, wheet, whisky, beast, cattle, cow, herd, livestock, pig, sheep, steer, stock, animal, bulls, calf, calves, chicken, cows, donkey, goat, goats, head of cattle, head of cow, head of sheep, heifer, heifers, lamb, live stock, livestock, livestocks, masai herd, milk cow, ox, ox cart, oxen, ram, red poll cattle, shee, sheep or ox, steers, stocks, medical supplies, medicine, m&b tablets, medicines, supplies, bags, bedding, blankets, books, charcoal, cloth, clothing, cooking utensils, cutlery, equipment, farm implements, household items, instruments, iron, pails, petrol, provisions, oil, sacks, supplies, tarpaulin, thatch, timber, tobacco, tools, uniforms, wire, wireless set, whiskey, articles, bag, battery, bucket, ciga, cigarettes, clothes, clothing etc, cloths, dairy item, dairy record book, goods, material, oil+tins, provisionv, railway uniforms, supplies, tarpaulian, typewriter, v- drive belts, gunny bags, church, airstrip, bridges, half built village, roads, trenches, water tank, bridge, bridge broken, bridge damaged, infrastructure, milt property, miltproperty, prison camp, stn damaged, school, school, school building, school house, school property, schools, embu guard, farm guard, forest guard, home guard, ikandine guard, kathanjure guard, kijabe guard, kikuyu guard, masai guard, meru guard, nandi guard, nkubu guard, stock guard, tigoni guard, tp and eg patrol, tp patrol, home guard patrol, m/g, m/g patrol, g, eg patrol, hg camp, hg leader, hg patrol, hg post, home, home guard, kg post, arab combat, arab combat units, asian combat, asian combat units, kings african rifles, 3 kar, 4 kar, 5 kar, 6 kar, 7 kar, 23 kar, 26 kar, k.a.r, k.p.r, k.a.r., 5th k.a.r, 5kar, 5 k.a.r, 4th kar, british military, devonshire regiment, field intelligence assistant, field intelligence officer, gloucestershire regiment, glosters, king's shropshire light infantry, royal east kent regiment, royal fusiliers, royal highland regiment, black watch, royal inniskilling fusiliers, royal northumberland fusiliers, rnf, police and military, army officer, british army officer, british military, buffs patrol, european officer, european soldiers, gloster patrol, captain folliott’s team, kenya regiment private, military (generic), captain, military property, platoon, non commissioned officers, patrol, sentrie, sgt white, psuedo gangs, pseudo gang, psuedo gangs, royal air force, paramilitary, kenya police, kp, kp constables' quarters, kpa, kenya police reserve, kpr officers, reserve police officer, police and k.p.r, police (generic), railway police, githumu police, masai special constable, tribal police, african constable, african costable, african special constable, tribal police, tribal police reserve, communities, manyatta, fishing camp, sublocation, village, camp, villages, detainees, prisoner, prisoners, suspected insurgents, bandits, food foragers, gangs, gang, kiama kia muingi, kkm, komerera, mau mau, oath administrator, passive wing, suspects, terrorists, terrorosts, oath administrater, oath administrators, passive wing members, resistance, suspect, suspected insurgents, terroist, terroists, terrost, africans, children, driver, employees, evangelist, family, farm boys, girls, informer, laborour, loyalist, mission staff, passengers, people, shopkeeper, students, teachers, vigilantes, women, workers, catholic misson staff, african staff, asian women, bus conductor, child, civilian(food carriers), civilian(schoolmaster), civilion, committee, committee member, courier, elder, embu tractor driver, employees of club, engine boy, girl, golf club staff, his own hut, hotel keeper, houseboy, illegal residents, indian, interpreter, kem, kikiyu, kikuyu assessor, kikuyu families, kikuyu houseboy, kikuyu labourer, kikyu, kirua village, labour line, labour lines, labourer, labourers, laboures, labourline, labours, males, man, maragoli, maragoli labourer, masai elders, masai tribesman, members of the thika committee, mna section leaders, municipal inspectors, non kikuyu employees, person, prostitutes, purke masai, pwd employee, railway employees, school master, school teacher, sisters committee, somali, staff, strangers, taxi drivers, teacher, treasurers, headman's son, gordon's labour, food carriersUnknown levels in `f`: ammunition, explosives, gelignite, firearms, arms, firearm, gun, pistol, rifle, ammunition, rifile, shotgun, verey pistol, other weapons, axe, scabbard, weapons, colonial authorities, councillor, district commissioner, district officer, forest ranger, game ranger, game warden, government employees, port authority, public works department, screening team, govrnment, wakamba screening team, do munuga, african do, dcmeru, colonial authorities, govtemployee, tribal authorities, elders, headman, chief chostram, chief eliud, chief's sentry, private property, buildings, cattle dip, duka, farms, garage, homes, huts, hotel, land rover, lorry, market, office, oxcart, property, pump house, sawmill, shops, stores, tractor, vehicle, windmill, bullock's farm, cattle boma, coffe trees, coffee trees, cuthouse, dairy farm, dip, house, household, houses, hut, instrument, labour camp post, labour huts, lorries, lucerne sheds, maize shamba, milk factory, pig sty, private property, property of civilians, shop, store, thika fishing camp, vehicles, cash, funds, money, conductor's takings, banana, barley, bran, cabbage, coffee, corn, cream, crops, dairy, food, fruit, grain, honey, maize, meat, milk, oats, posho, potatoes, sugar, vegetable, wheat, food, food etc, food store, food stores, foodstuffs, fruits, grains, grains+cloth +money, green maize cobs, potato, potato store, potatos, skimmed milk, sugar cane, sugar maize, vegetables, vegitable garden, vegitables, wheat bags, wheat store, wheet, whisky, beast, cattle, cow, herd, livestock, pig, sheep, steer, stock, animal, bulls, calf, calves, chicken, cows, donkey, goat, goats, head of cattle, head of cow, head of sheep, heifer, heifers, lamb, live stock, livestock, livestocks, masai herd, milk cow, ox, ox cart, oxen, ram, red poll cattle, shee, sheep or ox, steers, stocks, medical supplies, medicine, m&b tablets, medicines, supplies, bags, bedding, blankets, books, charcoal, cloth, clothing, cooking utensils, cutlery, equipment, farm implements, household items, instruments, iron, pails, petrol, provisions, oil, sacks, supplies, tarpaulin, thatch, timber, tobacco, tools, uniforms, wire, wireless set, whiskey, articles, bag, battery, bucket, ciga, cigarettes, clothes, clothing etc, cloths, dairy item, dairy record book, goods, material, oil+tins, provisionv, railway uniforms, supplies, tarpaulian, typewriter, v- drive belts, gunny bags, church, airstrip, bridges, half built village, roads, trenches, water tank, bridge, bridge broken, bridge damaged, infrastructure, milt property, miltproperty, prison camp, stn damaged, school, school, school building, school house, school property, schools, bg, embu guard, farm guard, forest guard, home guard, ikandine guard, kathanjure guard, kijabe guard, kikuyu guard, masai guard, meru guard, nandi guard, nkubu guard, stock guard, tigoni guard, tp and eg patrol, tp patrol, home guard patrol, m/g, m/g patrol, kathanjure hg, k g, ng, eg patrol, hg camp, hg leader, hg patrol, hg post, home, home guard, kg post, arab combat, arab combat unit, arab combat units, asian combat, asian combat team, second asian combat unit, asian combat units, kings african rifles, 3 kar, 4 kar, 5 kar, 6 kar, 7 kar, 23 kar, 26 kar, k.a.r, k.p.r, k.a.r., 5th k.a.r, 5kar, 5 k.a.r, 4th kar, british military, devonshire regiment, devons, field intelligence assistant, field intelligence officer, fio, gloucestershire regiment, glosters, lancashire fusiliers, king's shropshire light infantry, royal east kent regiment, royal fusiliers, royal highland regiment, black watch, royal inniskilling fusiliers, royal irish fusiliers, royal northumberland fusiliers, rnf, police and military, lancashire fusilliers, sp company 1 royal innisks, 1 rnf, rif, ksli, inniskillings, 1 glosters, 1 bw, 1 buffs, "a" company 1 royal innisks, "a" company, royal fusilers, of devons, of 1 glosters, lanc fus, fusiliers, fio kruger, fios, a co devon, 4 platoon support company, "c" company1 royal innisks, 6 platoonsp company 1 royal innisks, 1 lf, "c" company, "d" company, "a", "a" company bw, buffs ambush, d company, d' force, devens, c company, "d" force, army officer, british army officer, british military, buffs patrol, european officer, european soldiers, gloster patrol, captain folliott’s team, kenreg, kenregg, kenya regiment sergeant, keniya regiment, kenya regiment private, military (generic), captain, company, military property, platoon, security forces, security force, striking force, sentry, non commissioned officers, patrol, sentrie, sgt white, psuedo gangs, pseudo gang, pseudo team, trojan, psuedo gangs, trojan team, tracker group, pseudo teams, royal air force, raf, bombers, air strike, harvards, raf lincolns, flying squard, paramilitary, general service unit, cid, kenya police, kp constables' quarters, kpa, kenya police reserve, kpr officers, reserve police officer, rpo, rpos, police and k.p.r, police (generic), constable, polce, policy party, railway police, special branch, blue doctor team, special branch team, sb officers, githumu police, masai special constable, tribal police, tpeg, african constable, african costable, african special constable, tribal police, tribal police reserve, communities, manyatta, fishing camp, sublocation, village, camp, villages, detainees, prisoner, prisoners, suspected insurgents, bandits, food foragers, gangs, gang, kiama kia muingi, kkm, komerera, mau mau, oath administrator, passive wing, rebels, suspects, terrorists, terrorosts, gunman, terorist, gunmen, resistance group, resistance groups, oath administrater, oath administrators, passive wing members, resistance, suspect, suspected insurgents, terroist, terroists, terrost, africans, children, civilian, driver, employees, evangelist, family, farm boys, girls, informer, kikuyu, laborour, loyalist, masai, men, mission staff, owner, passengers, people, tugen tribesmen, stranger, sikh, herd boys, isiolo game scouts, farm labour, farmer, european, employer, employee, civilan, shopkeeper, students, teachers, turkana, vigilantes, women, workers, villagers, local labour, kikuyus, embu, tiriki houseboy, samburu, manager, woman, vetofficer, mrhiggins, masai party, kuria tribesmen, manager of akira estates, kuria tribesmen, chstephen, african, catholic misson staff, african staff, asian women, bus conductor, child, civilian(food carriers), civilian(schoolmaster), civilion, committee, committee member, courier, elder, embu tractor driver, employees of club, engine boy, girl, golf club staff, his own hut, hotel keeper, houseboy, illegal residents, indian, interpreter, kem, kikiyu, kikuyu assessor, kikuyu families, kikuyu houseboy, kikuyu labourer, kikyu, kirua village, labour line, labour lines, labourer, labourers, laboures, labourline, labours, males, man, maragoli, maragoli labourer, masai elders, masai tribesman, members of the thika committee, mna section leaders, municipal inspectors, non kikuyu employees, person, prostitutes, purke masai, pwd employee, railway employees, school master, school teacher, sisters committee, somali, staff, strangers, taxi drivers, teacher, treasurers, headman's son, norton traill's labour, gordon's labour, food carriersUnknown levels in `f`: ammunition, explosives, gelignite, firearms, arms, firearm, gun, pistol, rifle, ammunition, rifile, shotgun, verey pistol, other weapons, axe, scabbard, weapons, colonial authorities, councillor, district commissioner, district officer, forest ranger, game ranger, game warden, government, government employees, port authority, public works department, screening team, do, govrnment, wakamba screening team, do munuga, african do, dcmeru, colonial authorities, govtemployee, tribal authorities, chief, elders, headman, chief chostram, chief eliud, chief's sentry, private property, buildings, cattle dip, duka, farms, garage, homes, huts, hotel, land rover, lorry, market, office, oxcart, property, pump house, sawmill, shops, stores, tractor, vehicle, windmill, bullock's farm, cattle boma, coffe trees, coffee trees, cuthouse, dairy farm, dip, house, household, houses, hut, instrument, labour camp post, labour huts, lorries, lucerne sheds, maize shamba, milk factory, pig sty, private property, property of civilians, shop, store, thika fishing camp, vehicles, cash, funds, money, conductor's takings, banana, barley, bran, cabbage, coffee, corn, cream, crops, dairy, food, fruit, grain, honey, maize, meat, milk, oats, posho, potatoes, sugar, vegetable, wheat, food, food etc, food store, food stores, foodstuffs, fruits, grains, grains+cloth +money, green maize cobs, potato, potato store, potatos, skimmed milk, sugar cane, sugar maize, vegetables, vegitable garden, vegitables, wheat bags, wheat store, wheet, whisky, beast, cattle, cow, herd, livestock, pig, sheep, steer, stock, animal, bulls, calf, calves, chicken, cows, donkey, goat, goats, head of cattle, head of cow, head of sheep, heifer, heifers, lamb, live stock, livestock, livestocks, masai herd, milk cow, ox, ox cart, oxen, ram, red poll cattle, shee, sheep or ox, steers, stocks, medical supplies, medicine, m&b tablets, medicines, supplies, bags, bedding, blankets, books, charcoal, cloth, clothing, cooking utensils, cutlery, equipment, farm implements, household items, instruments, iron, pails, petrol, provisions, oil, sacks, supplies, tarpaulin, thatch, timber, tobacco, tools, uniforms, wire, wireless set, whiskey, articles, bag, battery, bucket, ciga, cigarettes, clothes, clothing etc, cloths, dairy item, dairy record book, goods, material, oil+tins, provisionv, railway uniforms, supplies, tarpaulian, typewriter, v- drive belts, gunny bags, church, airstrip, bridges, half built village, roads, trenches, water tank, bridge, bridge broken, bridge damaged, infrastructure, milt property, miltproperty, prison camp, stn damaged, school, school, school building, school house, school property, schools, bg, eg, embu guard, farm guard, forest guard, home guard, ikandine guard, kathanjure guard, kijabe guard, kikuyu guard, masai guard, meru guard, nandi guard, nkubu guard, stock guard, tigoni guard, tp and eg patrol, tp patrol, home guard patrol, m, m/g, m/g patrol, g, kathanjure hg, k g, ng, eg patrol, hg camp, hg leader, hg patrol, hg post, home, home guard, kg post, arab combat, arab combat unit, arab combat units, asian combat, asian combat unit, asian combat team, second asian combat unit, asian combat units, kings african rifles, 3 kar, 4 kar, 5 kar, 6 kar, 7 kar, 23 kar, 26 kar, k.a.r, k.p.r, k.a.r., 5th k.a.r, 5kar, 5 k.a.r, 4th kar, british military, devonshire regiment, devons, field intelligence assistant, field intelligence officer, fio, gloucestershire regiment, glosters, lancashire fusiliers, king's shropshire light infantry, royal east kent regiment, buffs, royal fusiliers, royal highland regiment, black watch, watch, royal inniskilling fusiliers, royal irish fusiliers, royal northumberland fusiliers, rnf, police and military, army, lancashire fusilliers, sp company 1 royal innisks, 1 rnf, rif, ksli, inniskillings, 1 glosters, 1 bw, 1 buffs, "a" company 1 royal innisks, "a" company, royal fusilers, of devons, of 1 glosters, lanc fus, fusiliers, fio kruger, fios, a co devon, 4 platoon support company, "c" company1 royal innisks, 6 platoonsp company 1 royal innisks, 1 lf, "c" company, "d" company, "a", "a" company bw, buffs ambush, d company, d' force, devens, c company, "d" force, army officer, british army officer, british military, buffs patrol, european officer, european soldiers, gloster patrol, kenya regiment, captain folliott’s team, kr, kenreg, kenregg, kenya regiment sergeant, kenya regt, keniya regiment, kenya regiment private, military (generic), captain, company, army, military property, platoon, security forces, security force, coy, striking force, sentry, non commissioned officers, patrol, sentrie, sgt white, psuedo gangs, pseudo gang, pseudo team, trojan, psuedo gangs, trojan team, tracker group, pseudo teams, royal air force, raf, bombers, air strike, harvards, raf lincolns, flying squard, paramilitary, general service unit, gsu, cid, kenya police, kp, kp constables' quarters, kpa, kenya police reserve, kpr officers, reserve police officer, rpo, rpos, police and k.p.r, police (generic), constable, polce, policy party, railway police, special branch, blue doctor team, special branch team, sb officers, githumu police, masai special constable, tribal police, tpeg, african constable, african costable, african special constable, tribal police, tribal police reserve, tpr, communities, manyatta, fishing camp, sublocation, village, camp, villages, detainees, prisoner, prisoners, suspected insurgents, bandits, food foragers, gangs, gang, kiama kia muingi, kkm, komerera, mau mau, oath administrator, passive wing, rebels, suspects, terrorists, terrorosts, terrorist, gunman, terorist, gunmen, resistance group, resistance groups, oath administrater, oath administrators, passive wing members, resistance, suspect, suspected insurgents, terroist, terroists, terrost, africans, children, civilian, driver, employees, evangelist, family, farm boys, girls, informer, laborour, loyalist, masai, men, mission staff, owner, passengers, people, tugen tribesmen, stranger, sikh, herd boys, isiolo game scouts, farm labour, farmer, european, employer, employee, civilan, shopkeeper, students, teachers, turkana, vigilantes, women, workers, villagers, labour, local labour, kikuyus, embu, tiriki houseboy, samburu, manager, woman, vetofficer, mrhiggins, masai party, kuria tribesmen, manager of akira estates, kuria tribesmen, chstephen, african, catholic misson staff, african staff, asian women, bus conductor, child, civilian(food carriers), civilian(schoolmaster), civilion, committee, committee member, courier, elder, embu tractor driver, employees of club, engine boy, girl, golf club staff, his own hut, hotel keeper, houseboy, illegal residents, indian, interpreter, kem, kikiyu, kikuyu assessor, kikuyu families, kikuyu houseboy, kikuyu labourer, kikyu, kirua village, labour line, labour lines, labourer, labourers, laboures, labourline, labours, males, man, maragoli, maragoli labourer, masai elders, masai tribesman, members of the thika committee, mna section leaders, municipal inspectors, non kikuyu employees, person, prostitutes, purke masai, pwd employee, railway employees, school master, school teacher, sisters committee, somali, staff, strangers, taxi drivers, teacher, treasurers, headman's son, norton traill's labour, gordon's labour, food carriers
#https://community.rstudio.com/t/using-list-argument-with-fct-collapse/6552/7
# sort(table(events$initiator_clean_1_agglow))
lowlevelagg <- c(
"arab combat units", "cid", "psuedo gangs", "asian combat units", "special branch",
"tribal authorities", "tribal police reserve", "royal air force",
"paramilitary", "kenya regiment", "tribal police", "kenya police reserve", "kenya police",
"british military", "civilians", "Kings African Rifles", "military (generic)", "police (generic)",
"railway police", "home guard", "colonial authorities", "suspected insurgents"
)
#events <- events %>%
# mutate(initiator_clean_1_agglow=ifelse(initiator_clean_1_agglow %in% lowlevelagg & !is.na(initiator_clean_1_agglow),initiator_clean_1_agglow, "uncategorized")) %>%
# mutate(initiator_clean_2_agglow=ifelse(initiator_clean_2_agglow %in% lowlevelagg & !is.na(initiator_clean_2_agglow),initiator_clean_2_agglow, "uncategorized")) %>%
# mutate(initiator_clean_3_agglow=ifelse(initiator_clean_3_agglow %in% lowlevelagg & !is.na(initiator_clean_3_agglow),initiator_clean_3_agglow, "uncategorized"))
# table(events$initiator_clean_1_agglow, useNA="always")
events[, c("initiator_clean_1_aggmed", "initiator_clean_2_aggmed", "initiator_clean_3_aggmed")] <-
events[, c("initiator_clean_1_agglow", "initiator_clean_2_agglow", "initiator_clean_3_agglow")]
events <- events %>%
mutate_at(
vars(starts_with("initiator_clean_1_aggmed|initiator_clean_2_aggmed|initiator_clean_3_aggmed")),
.funs = funs(forcats::fct_collapse(
'police'=c('cid','kenya police reserve','kenya police','police (generic)','railway police','special branch','tribal police','tribal police reserve') ,
'military'=c('arab combat units','asian combat units','british military','kings african rifles','kenya regiment','military (generic)','psuedo gangs','royal air force') ,
'civil authorities'=c('colonial authorities', 'tribal authorities')
))
)
events$initiator_clean_2_aggmed %>%
janitor::tabyl(sort = TRUE) %>%
janitor::adorn_crosstab(digits = 1)
events[, c("initiator_clean_1_agghigh", "initiator_clean_2_agghigh", "initiator_clean_3_agghigh")] <-
events[, c("initiator_clean_1_aggmed", "initiator_clean_2_aggmed", "initiator_clean_3_aggmed")]
events <- events %>%
mutate_at(
vars(starts_with("initiator_clean_1_agghigh|initiator_clean_2_agghigh|initiator_clean_3_agghigh")),
.funs = funs(forcats::fct_collapse(
'government'=c('civil authorities', 'home guard', 'military', 'police', 'paramilitary') ,
'rebels'=c('suspected insurgents')
))
)
events$initiator_clean_3_agghigh %>%
janitor::tabyl(sort = TRUE) %>%
janitor::adorn_crosstab(digits = 1)
regex <- "\\.|patrol|[1-9]\\s*rd|[1-9]\\s*th" # with regex start trying to get more of these to automatically map instead of generating lots of hand codings
events$target_clean <- events$initiator %>% stringr::str_trim() %>% tolower() %>% gsub(regex, "", .)
events <- events %>%
dplyr::select(-one_of("target_clean_1", "target_clean_2", "target_clean_3")) %>% # separate will continue to add columns every time its run so drop old versions. First time this is run will throw a warning.
tidyr ::separate(
col = initiator_clean,
into = c("target_clean_1", "target_clean_2", "target_clean_3"),
sep = "and|\\\\|/|\\&|,", remove = F, extra = "drop", fill = "right"
)
Unknown columns: `target_clean_1`, `target_clean_2`, `target_clean_3`
events <- events %>%
mutate_at(vars(starts_with("target_clean_")), funs(gsub(".*terror.*|.*erori.*|.*erroris*|.*mau mau.*|.*gang.*", "terrorist", ., ignore.case =T)) ) %>%
mutate_at(vars(starts_with("target_clean_")), funs(gsub(".*police.*", "police", ., ignore.case =T))) %>%
mutate_at(vars(starts_with("target_clean_")), funs(gsub(".*guard.*", "guard", ., ignore.case =T))) %>%
mutate_at(vars(starts_with("target_clean_")), funs(gsub(".*kpr.*|.*k p r.*", "kpr", ., ignore.case =T))) %>%
mutate_at(vars(starts_with("target_clean_")), funs(gsub(".*kar.*|.*k a r.*", "kar", ., ignore.case =T))) %>%
mutate_at(vars(starts_with("target_clean_")), funs(gsub(".*coy.*", "coy", ., ignore.case =T))) %>%
mutate_at(vars(starts_with("target_clean_")), funs(gsub(".*gsu.*", "gsu", ., ignore.case =T))) %>%
mutate_at(vars(starts_with("target_clean_")), funs(gsub(".*watch.*", "watch", ., ignore.case =T))) %>%
mutate_at(vars(starts_with("target_clean_")), funs(trimws(.)))
events$target_clean_1 %>%
janitor::tabyl(sort = TRUE) %>%
janitor::adorn_crosstab(digits = 1)
'janitor::adorn_crosstab' is deprecated.
Use 'use the various adorn_ functions instead. See the "tabyl" vignette for examples.' instead.
See help("Deprecated")
events <- events %>%
mutate(target_clean_1_agglow = do.call(forcats::fct_collapse, c(list(events$target_clean_1), initiator_target_master_clean)) %>% as.character() ) %>%
mutate(target_clean_2_agglow = do.call(forcats::fct_collapse, c(list(events$target_clean_2), initiator_target_master_clean)) %>% as.character()) %>%
mutate(target_clean_3_agglow = do.call(forcats::fct_collapse, c(list(events$target_clean_3), initiator_target_master_clean)) %>% as.character())
Unknown levels in `f`: ammunition, explosives, gelignite, firearms, arms, firearm, gun, pistol, rifle, ammunition, rifile, shotgun, verey pistol, other weapons, axe, scabbard, weapons, colonial authorities, councillor, district commissioner, district officer, government employees, port authority, public works department, colonial authorities, govtemployee, tribal authorities, elders, chief's sentry, private property, buildings, cattle dip, duka, farms, garage, homes, huts, hotel, land rover, lorry, market, office, oxcart, property, pump house, sawmill, shops, stores, tractor, vehicle, windmill, bullock's farm, cattle boma, coffe trees, coffee trees, cuthouse, dairy farm, dip, house, household, houses, hut, instrument, labour camp post, labour huts, lorries, lucerne sheds, maize shamba, milk factory, pig sty, private property, property of civilians, shop, store, thika fishing camp, vehicles, cash, funds, money, conductor's takings, banana, barley, bran, cabbage, coffee, corn, cream, crops, dairy, food, fruit, grain, honey, maize, meat, milk, oats, posho, potatoes, sugar, vegetable, wheat, food, food etc, food store, food stores, foodstuffs, fruits, grains, grains+cloth +money, green maize cobs, potato, potato store, potatos, skimmed milk, sugar cane, sugar maize, vegetables, vegitable garden, vegitables, wheat bags, wheat store, wheet, whisky, beast, cattle, cow, herd, livestock, pig, sheep, steer, stock, animal, bulls, calf, calves, chicken, cows, donkey, goat, goats, head of cattle, head of cow, head of sheep, heifer, heifers, lamb, live stock, livestock, livestocks, masai herd, milk cow, ox, ox cart, oxen, ram, red poll cattle, shee, sheep or ox, steers, stocks, medical supplies, medicine, m&b tablets, medicines, supplies, bags, bedding, blankets, books, charcoal, cloth, clothing, cooking utensils, cutlery, equipment, farm implements, household items, instruments, iron, pails, petrol, provisions, oil, sacks, supplies, tarpaulin, thatch, timber, tobacco, tools, uniforms, wire, wireless set, whiskey, articles, bag, battery, bucket, ciga, cigarettes, clothes, clothing etc, cloths, dairy item, dairy record book, goods, material, oil+tins, provisionv, railway uniforms, supplies, tarpaulian, typewriter, v- drive belts, gunny bags, church, airstrip, bridges, half built village, roads, trenches, water tank, bridge, bridge broken, bridge damaged, infrastructure, milt property, miltproperty, prison camp, stn damaged, school, school, school building, school house, school property, schools, embu guard, farm guard, forest guard, home guard, ikandine guard, kathanjure guard, kijabe guard, kikuyu guard, masai guard, meru guard, nandi guard, nkubu guard, stock guard, tigoni guard, tp and eg patrol, tp patrol, home guard patrol, m/g, m/g patrol, g, eg patrol, hg camp, hg leader, hg patrol, hg post, home, home guard, kg post, arab combat, arab combat units, asian combat, asian combat units, kings african rifles, 3 kar, 4 kar, 5 kar, 6 kar, 7 kar, 23 kar, 26 kar, k.a.r, k.p.r, k.a.r., 5th k.a.r, 5kar, 5 k.a.r, 4th kar, british military, devonshire regiment, field intelligence assistant, field intelligence officer, gloucestershire regiment, glosters, king's shropshire light infantry, royal east kent regiment, royal fusiliers, royal highland regiment, black watch, royal inniskilling fusiliers, royal northumberland fusiliers, rnf, police and military, army officer, british army officer, british military, buffs patrol, european officer, european soldiers, gloster patrol, captain folliott’s team, kenya regiment private, military (generic), captain, military property, platoon, non commissioned officers, patrol, sentrie, sgt white, psuedo gangs, pseudo gang, psuedo gangs, royal air force, paramilitary, kenya police, kp, kp constables' quarters, kpa, kenya police reserve, kpr officers, reserve police officer, police and k.p.r, police (generic), railway police, githumu police, masai special constable, tribal police, african constable, african costable, african special constable, tribal police, tribal police reserve, communities, manyatta, fishing camp, sublocation, village, camp, villages, detainees, prisoner, prisoners, suspected insurgents, bandits, food foragers, gangs, gang, kiama kia muingi, kkm, komerera, mau mau, oath administrator, passive wing, suspects, terrorists, terrorosts, terorist, oath administrater, oath administrators, passive wing members, resistance, suspect, suspected insurgents, terroist, terroists, terrost, africans, children, driver, employees, evangelist, family, farm boys, girls, informer, laborour, loyalist, mission staff, passengers, people, shopkeeper, students, teachers, vigilantes, women, workers, catholic misson staff, african staff, asian women, bus conductor, child, civilian(food carriers), civilian(schoolmaster), civilion, committee, committee member, courier, elder, embu tractor driver, employees of club, engine boy, girl, golf club staff, his own hut, hotel keeper, houseboy, illegal residents, indian, interpreter, kem, kikiyu, kikuyu assessor, kikuyu families, kikuyu houseboy, kikuyu labourer, kikyu, kirua village, labour line, labour lines, labourer, labourers, laboures, labourline, labours, males, man, maragoli, maragoli labourer, masai elders, masai tribesman, members of the thika committee, mna section leaders, municipal inspectors, non kikuyu employees, person, prostitutes, purke masai, pwd employee, railway employees, school master, school teacher, sisters committee, somali, staff, strangers, taxi drivers, teacher, treasurers, headman's son, gordon's labour, food carriersUnknown levels in `f`: ammunition, explosives, gelignite, firearms, arms, firearm, gun, pistol, rifle, ammunition, rifile, shotgun, verey pistol, other weapons, axe, scabbard, weapons, colonial authorities, councillor, district commissioner, district officer, forest ranger, game ranger, game warden, government employees, port authority, public works department, screening team, govrnment, wakamba screening team, do munuga, african do, dcmeru, colonial authorities, govtemployee, tribal authorities, elders, headman, chief chostram, chief eliud, chief's sentry, private property, buildings, cattle dip, duka, farms, garage, homes, huts, hotel, land rover, lorry, market, office, oxcart, property, pump house, sawmill, shops, stores, tractor, vehicle, windmill, bullock's farm, cattle boma, coffe trees, coffee trees, cuthouse, dairy farm, dip, house, household, houses, hut, instrument, labour camp post, labour huts, lorries, lucerne sheds, maize shamba, milk factory, pig sty, private property, property of civilians, shop, store, thika fishing camp, vehicles, cash, funds, money, conductor's takings, banana, barley, bran, cabbage, coffee, corn, cream, crops, dairy, food, fruit, grain, honey, maize, meat, milk, oats, posho, potatoes, sugar, vegetable, wheat, food, food etc, food store, food stores, foodstuffs, fruits, grains, grains+cloth +money, green maize cobs, potato, potato store, potatos, skimmed milk, sugar cane, sugar maize, vegetables, vegitable garden, vegitables, wheat bags, wheat store, wheet, whisky, beast, cattle, cow, herd, livestock, pig, sheep, steer, stock, animal, bulls, calf, calves, chicken, cows, donkey, goat, goats, head of cattle, head of cow, head of sheep, heifer, heifers, lamb, live stock, livestock, livestocks, masai herd, milk cow, ox, ox cart, oxen, ram, red poll cattle, shee, sheep or ox, steers, stocks, medical supplies, medicine, m&b tablets, medicines, supplies, bags, bedding, blankets, books, charcoal, cloth, clothing, cooking utensils, cutlery, equipment, farm implements, household items, instruments, iron, pails, petrol, provisions, oil, sacks, supplies, tarpaulin, thatch, timber, tobacco, tools, uniforms, wire, wireless set, whiskey, articles, bag, battery, bucket, ciga, cigarettes, clothes, clothing etc, cloths, dairy item, dairy record book, goods, material, oil+tins, provisionv, railway uniforms, supplies, tarpaulian, typewriter, v- drive belts, gunny bags, church, airstrip, bridges, half built village, roads, trenches, water tank, bridge, bridge broken, bridge damaged, infrastructure, milt property, miltproperty, prison camp, stn damaged, school, school, school building, school house, school property, schools, bg, embu guard, farm guard, forest guard, home guard, ikandine guard, kathanjure guard, kijabe guard, kikuyu guard, masai guard, meru guard, nandi guard, nkubu guard, stock guard, tigoni guard, tp and eg patrol, tp patrol, home guard patrol, m/g, m/g patrol, kathanjure hg, k g, ng, eg patrol, hg camp, hg leader, hg patrol, hg post, home, home guard, kg post, arab combat, arab combat unit, arab combat units, asian combat, asian combat team, second asian combat unit, asian combat units, kings african rifles, 3 kar, 4 kar, 5 kar, 6 kar, 7 kar, 23 kar, 26 kar, k.a.r, k.p.r, k.a.r., 5th k.a.r, 5kar, 5 k.a.r, 4th kar, british military, devonshire regiment, devons, field intelligence assistant, field intelligence officer, fio, gloucestershire regiment, glosters, lancashire fusiliers, king's shropshire light infantry, royal east kent regiment, royal fusiliers, royal highland regiment, black watch, royal inniskilling fusiliers, royal irish fusiliers, royal northumberland fusiliers, rnf, police and military, lancashire fusilliers, sp company 1 royal innisks, 1 rnf, rif, ksli, inniskillings, 1 glosters, 1 bw, 1 buffs, "a" company 1 royal innisks, "a" company, royal fusilers, of devons, of 1 glosters, lanc fus, fusiliers, fio kruger, fios, a co devon, 4 platoon support company, "c" company1 royal innisks, 6 platoonsp company 1 royal innisks, 1 lf, "c" company, "d" company, "a", "a" company bw, buffs ambush, d company, d' force, devens, c company, "d" force, army officer, british army officer, british military, buffs patrol, european officer, european soldiers, gloster patrol, captain folliott’s team, kenreg, kenregg, kenya regiment sergeant, keniya regiment, kenya regiment private, military (generic), captain, company, military property, platoon, security forces, security force, striking force, sentry, non commissioned officers, patrol, sentrie, sgt white, psuedo gangs, pseudo gang, pseudo team, trojan, psuedo gangs, trojan team, tracker group, pseudo teams, royal air force, raf, bombers, air strike, harvards, raf lincolns, flying squard, paramilitary, general service unit, cid, kenya police, kp constables' quarters, kpa, kenya police reserve, kpr officers, reserve police officer, rpo, rpos, police and k.p.r, police (generic), constable, polce, policy party, railway police, special branch, blue doctor team, special branch team, sb officers, githumu police, masai special constable, tribal police, tpeg, african constable, african costable, african special constable, tribal police, tribal police reserve, communities, manyatta, fishing camp, sublocation, village, camp, villages, detainees, prisoner, prisoners, suspected insurgents, bandits, food foragers, gangs, gang, kiama kia muingi, kkm, komerera, mau mau, oath administrator, passive wing, rebels, suspects, terrorists, terrorosts, gunman, terorist, gunmen, resistance group, resistance groups, oath administrater, oath administrators, passive wing members, resistance, suspect, suspected insurgents, terroist, terroists, terrost, africans, children, civilian, driver, employees, evangelist, family, farm boys, girls, informer, kikuyu, laborour, loyalist, masai, men, mission staff, owner, passengers, people, tugen tribesmen, stranger, sikh, herd boys, isiolo game scouts, farm labour, farmer, european, employer, employee, civilan, shopkeeper, students, teachers, turkana, vigilantes, women, workers, villagers, local labour, kikuyus, embu, tiriki houseboy, samburu, manager, woman, vetofficer, mrhiggins, masai party, kuria tribesmen, manager of akira estates, kuria tribesmen, chstephen, african, catholic misson staff, african staff, asian women, bus conductor, child, civilian(food carriers), civilian(schoolmaster), civilion, committee, committee member, courier, elder, embu tractor driver, employees of club, engine boy, girl, golf club staff, his own hut, hotel keeper, houseboy, illegal residents, indian, interpreter, kem, kikiyu, kikuyu assessor, kikuyu families, kikuyu houseboy, kikuyu labourer, kikyu, kirua village, labour line, labour lines, labourer, labourers, laboures, labourline, labours, males, man, maragoli, maragoli labourer, masai elders, masai tribesman, members of the thika committee, mna section leaders, municipal inspectors, non kikuyu employees, person, prostitutes, purke masai, pwd employee, railway employees, school master, school teacher, sisters committee, somali, staff, strangers, taxi drivers, teacher, treasurers, headman's son, norton traill's labour, gordon's labour, food carriersUnknown levels in `f`: ammunition, explosives, gelignite, firearms, arms, firearm, gun, pistol, rifle, ammunition, rifile, shotgun, verey pistol, other weapons, axe, scabbard, weapons, colonial authorities, councillor, district commissioner, district officer, forest ranger, game ranger, game warden, government, government employees, port authority, public works department, screening team, do, govrnment, wakamba screening team, do munuga, african do, dcmeru, colonial authorities, govtemployee, tribal authorities, chief, elders, headman, chief chostram, chief eliud, chief's sentry, private property, buildings, cattle dip, duka, farms, garage, homes, huts, hotel, land rover, lorry, market, office, oxcart, property, pump house, sawmill, shops, stores, tractor, vehicle, windmill, bullock's farm, cattle boma, coffe trees, coffee trees, cuthouse, dairy farm, dip, house, household, houses, hut, instrument, labour camp post, labour huts, lorries, lucerne sheds, maize shamba, milk factory, pig sty, private property, property of civilians, shop, store, thika fishing camp, vehicles, cash, funds, money, conductor's takings, banana, barley, bran, cabbage, coffee, corn, cream, crops, dairy, food, fruit, grain, honey, maize, meat, milk, oats, posho, potatoes, sugar, vegetable, wheat, food, food etc, food store, food stores, foodstuffs, fruits, grains, grains+cloth +money, green maize cobs, potato, potato store, potatos, skimmed milk, sugar cane, sugar maize, vegetables, vegitable garden, vegitables, wheat bags, wheat store, wheet, whisky, beast, cattle, cow, herd, livestock, pig, sheep, steer, stock, animal, bulls, calf, calves, chicken, cows, donkey, goat, goats, head of cattle, head of cow, head of sheep, heifer, heifers, lamb, live stock, livestock, livestocks, masai herd, milk cow, ox, ox cart, oxen, ram, red poll cattle, shee, sheep or ox, steers, stocks, medical supplies, medicine, m&b tablets, medicines, supplies, bags, bedding, blankets, books, charcoal, cloth, clothing, cooking utensils, cutlery, equipment, farm implements, household items, instruments, iron, pails, petrol, provisions, oil, sacks, supplies, tarpaulin, thatch, timber, tobacco, tools, uniforms, wire, wireless set, whiskey, articles, bag, battery, bucket, ciga, cigarettes, clothes, clothing etc, cloths, dairy item, dairy record book, goods, material, oil+tins, provisionv, railway uniforms, supplies, tarpaulian, typewriter, v- drive belts, gunny bags, church, airstrip, bridges, half built village, roads, trenches, water tank, bridge, bridge broken, bridge damaged, infrastructure, milt property, miltproperty, prison camp, stn damaged, school, school, school building, school house, school property, schools, bg, eg, embu guard, farm guard, forest guard, home guard, ikandine guard, kathanjure guard, kijabe guard, kikuyu guard, masai guard, meru guard, nandi guard, nkubu guard, stock guard, tigoni guard, tp and eg patrol, tp patrol, home guard patrol, m, m/g, m/g patrol, g, kathanjure hg, k g, ng, eg patrol, hg camp, hg leader, hg patrol, hg post, home, home guard, kg post, arab combat, arab combat unit, arab combat units, asian combat, asian combat unit, asian combat team, second asian combat unit, asian combat units, kings african rifles, 3 kar, 4 kar, 5 kar, 6 kar, 7 kar, 23 kar, 26 kar, k.a.r, k.p.r, k.a.r., 5th k.a.r, 5kar, 5 k.a.r, 4th kar, british military, devonshire regiment, devons, field intelligence assistant, field intelligence officer, fio, gloucestershire regiment, glosters, lancashire fusiliers, king's shropshire light infantry, royal east kent regiment, buffs, royal fusiliers, royal highland regiment, black watch, watch, royal inniskilling fusiliers, royal irish fusiliers, royal northumberland fusiliers, rnf, police and military, army, lancashire fusilliers, sp company 1 royal innisks, 1 rnf, rif, ksli, inniskillings, 1 glosters, 1 bw, 1 buffs, "a" company 1 royal innisks, "a" company, royal fusilers, of devons, of 1 glosters, lanc fus, fusiliers, fio kruger, fios, a co devon, 4 platoon support company, "c" company1 royal innisks, 6 platoonsp company 1 royal innisks, 1 lf, "c" company, "d" company, "a", "a" company bw, buffs ambush, d company, d' force, devens, c company, "d" force, army officer, british army officer, british military, buffs patrol, european officer, european soldiers, gloster patrol, kenya regiment, captain folliott’s team, kr, kenreg, kenregg, kenya regiment sergeant, kenya regt, keniya regiment, kenya regiment private, military (generic), captain, company, army, military property, platoon, security forces, security force, coy, striking force, sentry, non commissioned officers, patrol, sentrie, sgt white, psuedo gangs, pseudo gang, pseudo team, trojan, psuedo gangs, trojan team, tracker group, pseudo teams, royal air force, raf, bombers, air strike, harvards, raf lincolns, flying squard, paramilitary, general service unit, gsu, cid, kenya police, kp, kp constables' quarters, kpa, kenya police reserve, kpr officers, reserve police officer, rpo, rpos, police and k.p.r, police (generic), constable, polce, policy party, railway police, special branch, blue doctor team, special branch team, sb officers, githumu police, masai special constable, tribal police, tpeg, african constable, african costable, african special constable, tribal police, tribal police reserve, tpr, communities, manyatta, fishing camp, sublocation, village, camp, villages, detainees, prisoner, prisoners, suspected insurgents, bandits, food foragers, gangs, gang, kiama kia muingi, kkm, komerera, mau mau, oath administrator, passive wing, rebels, suspects, terrorists, terrorosts, terrorist, gunman, terorist, gunmen, resistance group, resistance groups, oath administrater, oath administrators, passive wing members, resistance, suspect, suspected insurgents, terroist, terroists, terrost, africans, children, civilian, driver, employees, evangelist, family, farm boys, girls, informer, laborour, loyalist, masai, men, mission staff, owner, passengers, people, tugen tribesmen, stranger, sikh, herd boys, isiolo game scouts, farm labour, farmer, european, employer, employee, civilan, shopkeeper, students, teachers, turkana, vigilantes, women, workers, villagers, labour, local labour, kikuyus, embu, tiriki houseboy, samburu, manager, woman, vetofficer, mrhiggins, masai party, kuria tribesmen, manager of akira estates, kuria tribesmen, chstephen, african, catholic misson staff, african staff, asian women, bus conductor, child, civilian(food carriers), civilian(schoolmaster), civilion, committee, committee member, courier, elder, embu tractor driver, employees of club, engine boy, girl, golf club staff, his own hut, hotel keeper, houseboy, illegal residents, indian, interpreter, kem, kikiyu, kikuyu assessor, kikuyu families, kikuyu houseboy, kikuyu labourer, kikyu, kirua village, labour line, labour lines, labourer, labourers, laboures, labourline, labours, males, man, maragoli, maragoli labourer, masai elders, masai tribesman, members of the thika committee, mna section leaders, municipal inspectors, non kikuyu employees, person, prostitutes, purke masai, pwd employee, railway employees, school master, school teacher, sisters committee, somali, staff, strangers, taxi drivers, teacher, treasurers, headman's son, norton traill's labour, gordon's labour, food carriers
lowlevelagg <- c(
"church", "kenya police", "medicine", "tribal police reserve", "detainees", "kenya regiment", "other weapons",
"paramilitary", "ammunition", "communities", "british military", "military (generic)", "tribal authorities", "kenya police reserve", "tribal police",
"Kings African Rifles", "infrastructure", "school", "cash", "colonial authorities", "police (generic)", "supplies", "firearms", "food", "private property",
"home guard", "civilians", "livestock", "suspected insurgents"
)
#events <- events %>%
# mutate(target_clean_1_agglow=ifelse(target_clean_1_agglow %in% lowlevelagg & !is.na(target_clean_1_agglow),target_clean_1_agglow, "uncategorized")) %>%
# mutate(target_clean_2_agglow=ifelse(target_clean_2_agglow %in% lowlevelagg & !is.na(target_clean_2_agglow),target_clean_2_agglow, "uncategorized")) %>%
# mutate(target_clean_3_agglow=ifelse(target_clean_3_agglow %in% lowlevelagg & !is.na(target_clean_3_agglow),target_clean_3_agglow, "uncategorized"))
events$target_clean_1_agglow %>%
janitor::tabyl(sort = TRUE) %>%
janitor::adorn_crosstab(digits = 1)
'janitor::adorn_crosstab' is deprecated.
Use 'use the various adorn_ functions instead. See the "tabyl" vignette for examples.' instead.
See help("Deprecated")
events[, c("target_clean_1_aggmed", "target_clean_2_aggmed", "target_clean_3_aggmed")] <-
events[, c("target_clean_1_agglow", "target_clean_2_agglow", "target_clean_3_agglow")]
events <- events %>%
mutate_at(
vars(starts_with("initiator_clean_1_aggmed|initiator_clean_2_aggmed|initiator_clean_3_aggmed")),
.funs = funs(forcats::fct_collapse(temp,
'police'=c('cid','kenya police reserve','kenya police','police (generic)','railway police','special branch','tribal police','tribal police reserve') ,
'military'=c('arab combat units','asian combat units','british military','Kings African Rifles','kenya regiment','military (generic)','psuedo gangs','royal air force'),
'civil authorities'=c('colonial authorities', 'tribal authorities'),
'armaments'=c('ammunition','firearms','other weapons'),
'provisions'=c('cash','food','livestock','medicine','supplies'),
'public buildings'=c('church','school','infrastructure')
))
)
events$initiator_clean_1_aggmed %>%
janitor::tabyl(sort = TRUE) %>%
janitor::adorn_crosstab(digits = 1)
'janitor::adorn_crosstab' is deprecated.
Use 'use the various adorn_ functions instead. See the "tabyl" vignette for examples.' instead.
See help("Deprecated")
events[, c("target_clean_1_agghigh", "target_clean_2_agghigh", "target_clean_3_agghigh")] <-
events[, c("target_clean_1_aggmed", "target_clean_2_aggmed", "target_clean_3_aggmed")]
events <- events %>%
mutate_at(
vars(starts_with("target_clean_1_agghigh|target_clean_2_agghigh|target_clean_3_agghigh")),
.funs = funs(forcats::fct_collapse(
'government'=c('civil authorities', 'home guard', 'military', 'police', 'paramilitary') ,
'rebels'=c('suspected insurgents','detainees') ,
'property'=c('armaments','private property','provisions','public buildings') ,
'civilians'=c('communities','communities')
))
)
events$target_clean_1_agghigh %>%
janitor::tabyl(sort = TRUE) %>%
janitor::adorn_crosstab(digits = 1)
# These numbers are estimates and can be changed
# These columns are dirty and when an incoherent value is given, it is set to missing.
recode_counts <- function(indata){
acouple <- "2"
afew <- "3"
agang <- "6"
agang_large <- "12"
indata %>% tolower() %>% dplyr::recode(
"100+"= "100",
"??"= "",
"1 bag"= "1",
"1 blanket"= "1",
"1 burnt down"= "1",
"1 civilian"= "1",
"1 cow, 6 sheep"= "7",
"1 cow"= "1",
"1 goat, clothing"= "1",
"1 goat"= "1",
"1 looted"= "1",
"1 looted"= "1",
"1 ox"= "1",
"1 sheep and chickens"= "1",
"1 sheep, some chickens"= "1",
"1 sheep"= "1",
"1 shotgun ,30 rounds"= "31",
"1 shotgun + 10rds"= "11",
"1 steer"= "1",
"1 village, 1 market"= "1",
"1 wounded"= "1",
"1 wrecked"= "1",
"1+"= "1",
"1+3"= "4",
"1+some"= "1",
"10 acres"= "10",
"10 bags"= "10",
"10 cattle"= "10",
"10 sacks"= "10",
"10 to 12"= "11",
"10 to 15"= "13",
"10/14/2013"= "",
"10/15/2013"= "",
"10/20/2013"= "",
"100 lb"= "100",
"100-130"= "115",
"100-150"= "125",
"100+"= "100",
"10000"= "",
"109 cattle"= "109",
"10bags potatoes"= "10",
"11 cattle"= "11",
"11 sheep"= "11",
"112 bore & 20.1.45 &7 rds"= "112",
"12 bags"= "12",
"12 cattle"= "12",
"12 goats"= "12",
"12 to 15"= "13",
"12 to 20"= "17",
"12/14/2013"= "",
"120 cattle"= "120",
"120+1"= "121",
"13 sheep"= "13",
"13-15"= "14",
"1300 worth"= "1300",
"14 cattle"= "14",
"14 goats"= "14",
"14 head"= "14",
"14+"= "14",
"15 - 20"= "18",
"15 cattle"= "15",
"15 to 20"= "17",
"15 to 20"= "17",
"15 to 25"= "20",
"15-20"= "17",
"15+"= "15",
"150-200"= "175",
"150+"= "150",
"151 cattle"= "151",
"17 cattle"= "17",
"172 bags burnt"= "172",
"18 cattle"= "18",
"19 bags"= "19",
"196 rounds"= "196",
"2 bags maize"= "2",
"2 bags"= "2",
"2 bags"= "2",
"2 buckets"= "2",
"2 cattle hamstrung"= "2",
"2 cattle, corn"= "3",
"2 cattle"= "2",
"2 cows"= "2",
"2 debbies"= "2",
"2 goats"= "2",
"2 groups"= "2",
"2 huts burnt"= "2",
"2 sheep"= "2",
"2 watches, cash"= "2",
"2/3/2013"= "",
"2+"= "2",
"20 bags maize, 9 goats, 32 chickens and ducks, cash"= "60",
"20 bags"= "20",
"20 cattle"= "20",
"20 goats"= "20",
"20 sheep"= "20",
"20 to 25"= "23",
"20 to 30"= "25",
"20 to 40"= "30",
"20-25"= "23",
"20-30"= "25",
"20-35"= "30",
"20-50"= "35",
"20/30"= "25",
"20/30"= "25",
"20+"= "20",
"200 yds"= "200",
"200-300"= "250",
"200+"= "200",
"2000 acres"= "2000",
"21 goats"= "21",
"21 head"= "21",
"22 cattle"= "22",
"25 to 30"= "28",
"25-30"= "27",
"25-30"= "27",
"28 killed"= "28",
"28 sheep"= "28",
"3 bags"= "3",
"3 bags"= "3",
"3 bikes"= "3",
"3 cattle"= "3",
"3 cattle"= "3",
"3 goats"= "3",
"3 or 4"= "3",
"3 or 4"= "3",
"3 pangas"= "3",
"3 sheep, 2 calves"= "5",
"3 sheep"= "3",
"3 to 4"= "3",
"3 to 4"= "3",
"3/10/2013"= "",
"3/4/2013"= "",
"3/5/2013"= "",
"3/6/2013"= "",
"3+"= "3",
"3+3+1+2"= "9",
"3+some"= "3",
"30 acres"= "30",
"30 cattle"= "30",
"30 to 40"= "35",
"30-35"= "33",
"30-40"= "35",
"30-50"= "40",
"30+"= "30",
"300-400"= "350",
"300+"= "300",
"35 bags"= "35",
"35 to 40"= "37",
"38 cattle"= "38",
"3or 4"= "3",
"4 bags potatoes"= "4",
"4 bags"= "4",
"4 goats"= "4",
"4 groups"= "",
"4 or 5"= "4",
"4 oxen"= "4",
"4 sheep"= "4",
"4 to 8"= "6",
"4/6/2013"= "",
"40 bag"= "40",
"40 cattle"= "40",
"40 sacks"= "40",
"40 sheep"= "40",
"40 to 50"= "45",
"40/50"= "45",
"40-50"= "45",
"400 cattle"= "400",
"4000"= "",
"40161"="",
"44 cattle"= "44",
"5 bags"= "5",
"5 calves"= "5",
"5 cattle"= "5",
"5 destroyed"= "5",
"5 goats"= "5",
"5 killed"= "5",
"5 or 6"= "5",
"5 sheep, 1 ox"= "6",
"5 sheep"= "5",
"5 to 6"= "5",
"5/10/2013"= "",
"5/6/2013"= "",
"50 cattle"= "50",
"50 to 60"= "55",
"50-100"= "75",
"50-60"= "55",
"50-75"= "62",
"50+"= "50",
"50+"= "50",
"5000 acres"= "5000",
"519 +"= "519",
"53 detained"= "53",
"54 sheep and goats"= "54",
"56 committee members"= "56",
"6 bag"= "6",
"6 bags"= "6",
"6 cattle"= "6",
"6 cattle"= "6",
"6 goats"= "6",
"6 or 7"= "6",
"6 sheep and goats"= "6",
"6 sheep"= "6",
"6 to 7"= "6",
"6 to 8"= "7",
"6 to 9"= "8",
"6-8 man"= "7",
"6/10/2013"= "",
"6/8/2013"= "",
"60-100"= "80",
"60-70"= "65",
"64 cattle"= "64",
"7 bags"= "7",
"7 cattle"= "7",
"7 sheep"= "7",
"7/10/2013"= "",
"70 bags"= "70",
"70 cattle, sheep"= "70",
"70-100"= "85",
"70000"= "",
"75 rounds"= "75",
"8 bags potatoes"= "8",
"8 cattle"= "8",
"8 cows slashed"= "8",
"8 cows"= "8",
"8 sheep"= "8",
"8 to 10"= "9",
"8/10/2013"= "",
"80 cattle"= "80",
"80-100"= "90",
"84 sheep, 1 cow, 5 chickens"= "90",
"9 cattle"= "9",
"9 sheep"= "9",
"9 to 10"= "9",
"9+9"= "18",
"900(not clear)"= "900",
"all locals"= "",
"all"= "",
"app 5"= "5",
"app. 100"= "100",
"app. 120"= "120",
"armed gang"= agang,
"band"= agang,
"bands"= "",
"cattle slashing"= "",
"clothing"= "",
"considerable quantity"= "",
"fairly large gang"= agang_large,
"few bags"= "",
"few"= "",
"food"= "",
"gang"= agang,
"gangs"= agang_large,
"guards"= afew,
"half village"= "",
"labour"= "",
"large crowd"= "",
"large force"= agang_large,
"large gang"= agang_large,
"large meeting"= "",
"large number"= "",
"large numbers"= "",
"large quantities"= "",
"large quantity"= "",
"large re-oathing ceremony"= "",
"large scale"= "",
"large"= agang_large,
"largish gang"= agang_large,
"local populace"= "",
"many thousand"= "2000",
"mob"= "",
"not given"= "",
"number"= "",
"occupants"= "",
"over 200"= "200",
"Party"= agang,
"party"= agang,
"patrol"= agang,
"posho"= "",
"potatoes"= "",
"quantity of clothing"= "",
"section"= "",
"several gangs"= agang_large,
"several"= "3",
"sheep and goats"= "",
"shs 2,300/-"= "2300",
"shs 60/-"= "60",
"shs. 1,000"= "1000",
"shs. 18"= "18",
"shs. 30"= "30",
"small gang"= agang,
"small gangs"= agang,
"small group"= agang,
"small party"= afew,
"small"= agang,
"some"= afew,
"sufficient food"= "",
"unknown"= "",
"very large gang"= agang_large,
"villages in ndia, gichugu, embu divisions"= "",
"wives"= ""
) %>% as.numeric() %>% return()
}
events$initiator_numbers_numeric <- events$initiator_numbers %>% recode_counts()
NAs introduced by coercion
events$target_numbers_numeric <- events$target_numbers %>% recode_counts()
NAs introduced by coercion
events$affected_count_numeric <- events$affected_count %>% recode_counts()
NAs introduced by coercion
events[, c(
"government_killed_clean", "government_wounded_clean", "government_captured_clean",
"rebels_killed_clean", "rebels_wounded_clean", "rebels_captured_clean",
"civilians_killed_clean", "civilians_wounded_clean", "civilians_captured_clean"
)] <-
events[, c(
"government_killed", "government_wounded", "government_captured",
"rebels_killed", "rebels_wounded", "rebels_captured",
"civilians_killed", "civilians_wounded", "civilians_captured"
)]
events <- events %>% mutate_at(
.vars = c(
"government_killed_clean", "government_wounded_clean", "government_captured_clean",
"rebels_killed_clean", "rebels_wounded_clean", "rebels_captured_clean",
"civilians_killed_clean", "civilians_wounded_clean", "civilians_captured_clean"
),
funs(as.numeric(forcats::fct_collapse(.,
'1'=c('unKnown','unknown','UnKnown','UNKNOWN','Unkown','Unknown','Number','More','101','146','122','208','94'),
'2'=c('Few','others','Few','some'),
'3'=c('Many','Sevaral','several','Several More','Several others','Some','Council of elders','Council of war','Several','Majority','many','Gang','Several','Small gang','3+'),
'100'='100+',
'23'='23 Families',
'28'='28 families',
'35'='30-40',
'50'='50+',
'45'='4500',
'80'='800',
'6'='6+' ,
'10'='10+' ,
'10197'=NA,
'7'='48') %>%
fct_explicit_na(na_level='0')
)
)
)
Unknown levels in f
: unKnown, unknown, UnKnown, UNKNOWN, Unkown, Unknown, Number, More, 146, 122, 208, 94, Few, others, Few, Many, Sevaral, several, Several More, Several others, Council of elders, Council of war, Several, Majority, many, Gang, Several, Small gang, 3+, 100+, 23 Families, 28 families, 30-40, 50+, 4500, 800, 6+, 10+, NA, 48Unknown levels in f
: unKnown, unknown, UnKnown, UNKNOWN, Unkown, Unknown, Number, More, 101, 146, 122, 208, 94, Few, others, Few, some, Many, Sevaral, several, Several More, Several others, Some, Council of elders, Council of war, Several, Majority, many, Gang, Several, Small gang, 3+, 100+, 23 Families, 28 families, 30-40, 50+, 4500, 800, 6+, 10+, NA, 48Unknown levels in f
: unKnown, unknown, UnKnown, UNKNOWN, Unkown, Unknown, Number, More, 101, 146, 122, 208, 94, Few, others, Few, some, Many, Sevaral, several, Several More, Several others, Some, Council of elders, Council of war, Several, Majority, many, Gang, Several, Small gang, 3+, 100+, 23 Families, 28 families, 30-40, 50+, 4500, 800, 6+, 10+, NA, 48Unknown levels in f
: unKnown, unknown, UnKnown, UNKNOWN, Number, More, 101, 146, 122, 208, 94, Few, others, Few, some, Sevaral, several, Several More, Several others, Council of elders, Council of war, Majority, many, Gang, Small gang, 3+, 100+, 23 Families, 28 families, 30-40, 50+, 4500, 800, 6+, 10+, NAUnknown levels in f
: unKnown, unknown, UnKnown, UNKNOWN, Unkown, Unknown, Number, 101, 146, 122, 208, some, Council of elders, Council of war, Majority, many, Gang, Small gang, 3+, 100+, 23 Families, 28 families, 30-40, 50+, 4500, 800, 10+, NAUnknown levels in f
: More, 101, 146, 122, 208, 94, others, Many, Sevaral, Several More, Several others, 6+, NAUnknown levels in f
: unKnown, unknown, UnKnown, UNKNOWN, Unkown, Unknown, Number, More, 101, 146, 122, 208, 94, Few, others, Few, some, Many, Sevaral, several, Several More, Several others, Council of elders, Council of war, Several, Majority, many, Gang, Several, Small gang, 3+, 100+, 23 Families, 28 families, 30-40, 50+, 4500, 800, 6+, 10+, NA, 48Unknown levels in f
: unKnown, unknown, UnKnown, UNKNOWN, Unkown, Unknown, Number, More, 101, 146, 122, 208, 94, Few, others, Few, some, Many, Sevaral, several, Several More, Several others, Some, Council of elders, Council of war, Several, Majority, many, Gang, Several, Small gang, 3+, 100+, 23 Families, 28 families, 30-40, 50+, 4500, 800, 6+, 10+, NA, 48Unknown levels in f
: unKnown, unknown, UnKnown, UNKNOWN, Unkown, Number, More, 101, 146, 122, 208, 94, Few, others, Few, some, Sevaral, Several More, Several others, Some, Council of elders, Council of war, Majority, many, Gang, Small gang, 3+, 100+, 23 Families, 28 families, 30-40, 50+, 4500, 800, 6+, 10+, NA, 48
events <- events %>% mutate_at(.vars = c(
"government_killed_clean", "government_wounded_clean", "government_captured_clean",
"rebels_killed_clean", "rebels_wounded_clean", "rebels_captured_clean",
"civilians_killed_clean", "civilians_wounded_clean", "civilians_captured_clean"
), funs(as.numeric))
events <- events %>%
mutate(rebels_killedwounded_clean = rebels_killed_clean + rebels_wounded_clean) %>%
mutate(government_killed_wounded_clean = government_killed_clean + government_wounded_clean) %>%
mutate(rebels_government_killedwounded_clean = rebels_killed_clean + rebels_wounded_clean) %>%
mutate(rebels_government_killed_clean = rebels_killed_clean + government_killed_clean) %>%
mutate(rebels_government_civilians_killed_clean = rebels_killed_clean + government_killed_clean + civilians_killed_clean)
events %>% janitor::crosstab(initiator_clean_1_agghigh, type_clean_agghigh) %>% janitor::adorn_crosstab(digits = 1)
'janitor::crosstab' is deprecated.
Use 'tabyl(dat, var1, var2, ...)' instead.
See help("Deprecated")Factor `type_clean_agghigh` contains implicit NA, consider using `forcats::fct_explicit_na`'janitor::adorn_crosstab' is deprecated.
Use 'use the various adorn_ functions instead. See the "tabyl" vignette for examples.' instead.
See help("Deprecated")
events %>% janitor::crosstab(target_clean_1_agghigh, type_clean_agghigh) %>% janitor::adorn_crosstab(digits = 1)
'janitor::crosstab' is deprecated.
Use 'tabyl(dat, var1, var2, ...)' instead.
See help("Deprecated")Factor `type_clean_agghigh` contains implicit NA, consider using `forcats::fct_explicit_na`'janitor::adorn_crosstab' is deprecated.
Use 'use the various adorn_ functions instead. See the "tabyl" vignette for examples.' instead.
See help("Deprecated")
events %>% janitor::crosstab(target_clean_1_agghigh, initiator_clean_1_agghigh) %>% janitor::adorn_crosstab(digits = 1)
'janitor::crosstab' is deprecated.
Use 'tabyl(dat, var1, var2, ...)' instead.
See help("Deprecated")'janitor::adorn_crosstab' is deprecated.
Use 'use the various adorn_ functions instead. See the "tabyl" vignette for examples.' instead.
See help("Deprecated")
cat("\014")
#Cases to handle
#"928141"
#"311449 328445 338443"
#"EASTING 30 and 27"
#"EastLeigh Sect.7"
#"FARM 535/4"
#"HAC 0202"
#"HAC.577236"
#"HZN 974641 & HZN 974651"
#"HZJ. 8595"
#"HZJ. 465765, HZJ. 459771, HZJ. 451756"
#"HZJ 42765, HZJ 42375and HZJ 429761"
#"HZH 960610, HZH 960630, HZH 977538"
#"H.Z.R. 4786"
#"HAD 1708, HAD 1709"
#"HAD 3326/3327"
#"HZJ 42765, HZJ 42375and HZJ 429761"
#"HZJ 9518 9617"
#"HZP 7430, HZP 9029, HZP 6448, HZP 7252, HZP 9448"
events$map_coordinate %>% janitor::tabyl()
. n percent valid_percent
?? 1 9.552011e-05 0.0001717918
1182 1 9.552011e-05 0.0001717918
1200 1 9.552011e-05 0.0001717918
12053 1 9.552011e-05 0.0001717918
12402 1 9.552011e-05 0.0001717918
1280 1 9.552011e-05 0.0001717918
1281 1 9.552011e-05 0.0001717918
1663 1 9.552011e-05 0.0001717918
184157 1 9.552011e-05 0.0001717918
1870 2 1.910402e-04 0.0003435836
1914 1 9.552011e-05 0.0001717918
2113 1 9.552011e-05 0.0001717918
2122 1 9.552011e-05 0.0001717918
214123 1 9.552011e-05 0.0001717918
223348 1 9.552011e-05 0.0001717918
2287 1 9.552011e-05 0.0001717918
2314 1 9.552011e-05 0.0001717918
2396 1 9.552011e-05 0.0001717918
2405 1 9.552011e-05 0.0001717918
2407 1 9.552011e-05 0.0001717918
2410 1 9.552011e-05 0.0001717918
2555 1 9.552011e-05 0.0001717918
2575 1 9.552011e-05 0.0001717918
2608 3 2.865603e-04 0.0005153754
2619 1 9.552011e-05 0.0001717918
270527 1 9.552011e-05 0.0001717918
2722 1 9.552011e-05 0.0001717918
2727 1 9.552011e-05 0.0001717918
2753 1 9.552011e-05 0.0001717918
2832 1 9.552011e-05 0.0001717918
2849 1 9.552011e-05 0.0001717918
2854 1 9.552011e-05 0.0001717918
2862 1 9.552011e-05 0.0001717918
2863 1 9.552011e-05 0.0001717918
287428 1 9.552011e-05 0.0001717918
2904 1 9.552011e-05 0.0001717918
2923 1 9.552011e-05 0.0001717918
2924 1 9.552011e-05 0.0001717918
2932 1 9.552011e-05 0.0001717918
29774 1 9.552011e-05 0.0001717918
3021 1 9.552011e-05 0.0001717918
3025 2 1.910402e-04 0.0003435836
3045 3545 3540 3141 1 9.552011e-05 0.0001717918
310189 1 9.552011e-05 0.0001717918
311449 328445 338443 1 9.552011e-05 0.0001717918
315505 1 9.552011e-05 0.0001717918
3210 1 9.552011e-05 0.0001717918
3257 1 9.552011e-05 0.0001717918
3342 1 9.552011e-05 0.0001717918
3344 1 9.552011e-05 0.0001717918
3351 1 9.552011e-05 0.0001717918
3357 2 1.910402e-04 0.0003435836
341088 1 9.552011e-05 0.0001717918
3555 1 9.552011e-05 0.0001717918
36334 1 9.552011e-05 0.0001717918
3705 1 9.552011e-05 0.0001717918
3709 1 9.552011e-05 0.0001717918
3727 1 9.552011e-05 0.0001717918
3730 1 9.552011e-05 0.0001717918
3743 1 9.552011e-05 0.0001717918
375197 1 9.552011e-05 0.0001717918
3761 2 1.910402e-04 0.0003435836
3833 2 1.910402e-04 0.0003435836
3849 1 9.552011e-05 0.0001717918
3854 1 9.552011e-05 0.0001717918
4021 1 9.552011e-05 0.0001717918
4030 1 9.552011e-05 0.0001717918
4052 1 9.552011e-05 0.0001717918
4126 1 9.552011e-05 0.0001717918
4152 3 2.865603e-04 0.0005153754
4467 1 9.552011e-05 0.0001717918
4646 1 9.552011e-05 0.0001717918
4872 5271 1 9.552011e-05 0.0001717918
5030 1 9.552011e-05 0.0001717918
506413 1 9.552011e-05 0.0001717918
511558 1 9.552011e-05 0.0001717918
5152 1 9.552011e-05 0.0001717918
5155 1 9.552011e-05 0.0001717918
5246 1 9.552011e-05 0.0001717918
5249 1 9.552011e-05 0.0001717918
5317 1 9.552011e-05 0.0001717918
5448 1 9.552011e-05 0.0001717918
552197 1 9.552011e-05 0.0001717918
5627 1 9.552011e-05 0.0001717918
563 1 9.552011e-05 0.0001717918
5720 1 9.552011e-05 0.0001717918
587361 1 9.552011e-05 0.0001717918
600570 1 9.552011e-05 0.0001717918
6020 1 9.552011e-05 0.0001717918
6050 2 1.910402e-04 0.0003435836
610340 1 9.552011e-05 0.0001717918
6154 2 1.910402e-04 0.0003435836
6225 2 1.910402e-04 0.0003435836
63343 1 9.552011e-05 0.0001717918
6424 1 9.552011e-05 0.0001717918
644526 1 9.552011e-05 0.0001717918
6452 2 1.910402e-04 0.0003435836
6638 1 9.552011e-05 0.0001717918
6640 1 9.552011e-05 0.0001717918
6654 1 9.552011e-05 0.0001717918
6740 1 9.552011e-05 0.0001717918
677 1 9.552011e-05 0.0001717918
6825 1 9.552011e-05 0.0001717918
6826 1 9.552011e-05 0.0001717918
7039 1 9.552011e-05 0.0001717918
7050 1 9.552011e-05 0.0001717918
7196 1 9.552011e-05 0.0001717918
7247 2 1.910402e-04 0.0003435836
7248 1 9.552011e-05 0.0001717918
7397 1 9.552011e-05 0.0001717918
7552 1 9.552011e-05 0.0001717918
757 1 9.552011e-05 0.0001717918
7570 1 9.552011e-05 0.0001717918
7754 2 1.910402e-04 0.0003435836
8134 1 9.552011e-05 0.0001717918
8153 1 9.552011e-05 0.0001717918
8345 1 9.552011e-05 0.0001717918
8351 1 9.552011e-05 0.0001717918
85154 1 9.552011e-05 0.0001717918
8516 1 9.552011e-05 0.0001717918
8556 1 9.552011e-05 0.0001717918
878560 1 9.552011e-05 0.0001717918
9245 2 1.910402e-04 0.0003435836
928141 1 9.552011e-05 0.0001717918
9649 1 9.552011e-05 0.0001717918
EASTING 30 and 27 1 9.552011e-05 0.0001717918
EastLeigh Sect.7 1 9.552011e-05 0.0001717918
FARM 535/4 1 9.552011e-05 0.0001717918
GZM 6590 1 9.552011e-05 0.0001717918
H?? 4397 1 9.552011e-05 0.0001717918
H.D.597254 1 9.552011e-05 0.0001717918
H.Z.R. 4786 2 1.910402e-04 0.0003435836
HAA 8106 1 9.552011e-05 0.0001717918
HAB 7310 2 1.910402e-04 0.0003435836
HAB 0535 1 9.552011e-05 0.0001717918
HAB 0578 1 9.552011e-05 0.0001717918
HAB 1316 2 1.910402e-04 0.0003435836
HAB 2106 1 9.552011e-05 0.0001717918
HAB 2244 1 9.552011e-05 0.0001717918
HAB 5640 1 9.552011e-05 0.0001717918
HAB 5806 4 3.820804e-04 0.0006871672
HAB 5830 1 9.552011e-05 0.0001717918
HAB 6232 2 1.910402e-04 0.0003435836
HAB 6234 1 9.552011e-05 0.0001717918
HAB 6305 1 9.552011e-05 0.0001717918
HAB 6401 1 9.552011e-05 0.0001717918
HAB 6403 1 9.552011e-05 0.0001717918
HAB 6405 1 9.552011e-05 0.0001717918
HAB 6505 5 4.776005e-04 0.0008589589
HAB 6507 1 9.552011e-05 0.0001717918
HAB 6609 2 1.910402e-04 0.0003435836
HAB 6630 1 9.552011e-05 0.0001717918
HAB 6639 2 1.910402e-04 0.0003435836
HAB 670100 1 9.552011e-05 0.0001717918
HAB 6707 3 2.865603e-04 0.0005153754
HAB 6709 1 9.552011e-05 0.0001717918
HAB 6710 1 9.552011e-05 0.0001717918
HAB 6734 3 2.865603e-04 0.0005153754
HAB 6788 1 9.552011e-05 0.0001717918
HAB 6804 2 1.910402e-04 0.0003435836
HAB 6807 2 1.910402e-04 0.0003435836
HAB 6811 1 9.552011e-05 0.0001717918
HAB 6818 3 2.865603e-04 0.0005153754
HAB 6829 1 9.552011e-05 0.0001717918
HAB 6841 2 1.910402e-04 0.0003435836
HAB 6904 2 1.910402e-04 0.0003435836
HAB 6908 1 9.552011e-05 0.0001717918
HAB 6913 1 9.552011e-05 0.0001717918
HAB 6921 4 3.820804e-04 0.0006871672
HAB 6941 1 9.552011e-05 0.0001717918
HAB 7001 1 9.552011e-05 0.0001717918
HAB 7002 1 9.552011e-05 0.0001717918
HAB 7013 1 9.552011e-05 0.0001717918
HAB 7016 1 9.552011e-05 0.0001717918
HAB 7024 1 9.552011e-05 0.0001717918
HAB 7040 1 9.552011e-05 0.0001717918
HAB 7045 1 9.552011e-05 0.0001717918
HAB 7102 1 9.552011e-05 0.0001717918
HAB 7107 1 9.552011e-05 0.0001717918
HAB 7135 1 9.552011e-05 0.0001717918
HAB 7145 1 9.552011e-05 0.0001717918
HAB 7200 2 1.910402e-04 0.0003435836
HAB 7208 1 9.552011e-05 0.0001717918
HAB 7211 1 9.552011e-05 0.0001717918
HAB 7214 2 1.910402e-04 0.0003435836
HAB 7216 1 9.552011e-05 0.0001717918
HAB 7231 11 1.050721e-03 0.0018897097
HAB 7302 1 9.552011e-05 0.0001717918
HAB 7305 1 9.552011e-05 0.0001717918
HAB 7308 1 9.552011e-05 0.0001717918
HAB 7315 2 1.910402e-04 0.0003435836
HAB 7316 1 9.552011e-05 0.0001717918
HAB 7405 1 9.552011e-05 0.0001717918
HAB 7413 2 1.910402e-04 0.0003435836
HAB 7419 1 9.552011e-05 0.0001717918
HAB 7501 3 2.865603e-04 0.0005153754
HAB 7511 1 9.552011e-05 0.0001717918
HAB 7519 1 9.552011e-05 0.0001717918
HAB 7526 1 9.552011e-05 0.0001717918
HAB 753068 1 9.552011e-05 0.0001717918
HAB 7605 1 9.552011e-05 0.0001717918
HAB 7611 1 9.552011e-05 0.0001717918
HAB 7624 4 3.820804e-04 0.0006871672
HAB 7630 1 9.552011e-05 0.0001717918
HAB 7655 1 9.552011e-05 0.0001717918
HAB 7702 1 9.552011e-05 0.0001717918
HAB 7712 1 9.552011e-05 0.0001717918
HAB 7801 4 3.820804e-04 0.0006871672
HAB 7803 1 9.552011e-05 0.0001717918
HAB 7807 1 9.552011e-05 0.0001717918
HAB 7810 1 9.552011e-05 0.0001717918
HAB 7813 1 9.552011e-05 0.0001717918
HAB 7815 5 4.776005e-04 0.0008589589
HAB 7852 2 1.910402e-04 0.0003435836
HAB 7912 1 9.552011e-05 0.0001717918
HAB 7933 3 2.865603e-04 0.0005153754
HAB 8000 1 9.552011e-05 0.0001717918
HAB 8002 1 9.552011e-05 0.0001717918
HAB 8003 1 9.552011e-05 0.0001717918
HAB 8010 9 8.596810e-04 0.0015461261
HAB 8017 1 9.552011e-05 0.0001717918
HAB 8027 1 9.552011e-05 0.0001717918
HAB 8125 10 9.552011e-04 0.0017179179
HAB 8132 1 9.552011e-05 0.0001717918
HAB 8200 1 9.552011e-05 0.0001717918
HAB 8226 1 9.552011e-05 0.0001717918
HAB 8300 4 3.820804e-04 0.0006871672
HAB 8303 1 9.552011e-05 0.0001717918
HAB 8321 1 9.552011e-05 0.0001717918
HAB 8409 3 2.865603e-04 0.0005153754
HAB 8423 1 9.552011e-05 0.0001717918
HAB 8509 1 9.552011e-05 0.0001717918
HAB 8524 1 9.552011e-05 0.0001717918
HAB 8604 1 9.552011e-05 0.0001717918
HAB 8605 4 3.820804e-04 0.0006871672
HAB 8606 3 2.865603e-04 0.0005153754
HAB 8710 3 2.865603e-04 0.0005153754
HAB 8714 2 1.910402e-04 0.0003435836
HAB 8803 1 9.552011e-05 0.0001717918
HAB 8906 3 2.865603e-04 0.0005153754
HAB 895250 1 9.552011e-05 0.0001717918
HAB 9002 1 9.552011e-05 0.0001717918
HAB 9102 1 9.552011e-05 0.0001717918
HAB 9103 1 9.552011e-05 0.0001717918
HAB 9117 1 9.552011e-05 0.0001717918
HAB 9127 2 1.910402e-04 0.0003435836
HAB 9129 1 9.552011e-05 0.0001717918
HAB 9204 1 9.552011e-05 0.0001717918
HAB 9227 2 1.910402e-04 0.0003435836
HAB 9229 1 9.552011e-05 0.0001717918
[ reached 'max' / getOption("max.print") -- omitted 3774 rows ]
events$map_coordinate_clean <- events$map_coordinate %>% stringr::str_replace_all("[[:punct:]]| ", "")
(events$map_coordinate_clean_length <- events$map_coordinate_clean %>% nchar() ) %>% janitor::tabyl() %>% round(3)
. n percent valid_percent
0 1 0.000 0.000
3 10 0.001 0.002
4 110 0.011 0.019
5 11 0.001 0.002
6 29 0.003 0.005
7 3155 0.301 0.542
8 38 0.004 0.007
9 2414 0.231 0.415
10 6 0.001 0.001
11 10 0.001 0.002
12 1 0.000 0.000
13 2 0.000 0.000
14 12 0.001 0.002
15 1 0.000 0.000
16 3 0.000 0.001
18 7 0.001 0.001
19 3 0.000 0.001
20 1 0.000 0.000
27 5 0.000 0.001
28 1 0.000 0.000
35 1 0.000 0.000
NA 4648 0.444 NA
(events$map_coordinate_clean_text <- events$map_coordinate_clean %>% gsub("[0-9]", "\\1",.)) %>% janitor::tabyl() %>% mutate_if(is.numeric, round,2) #Split into a text component and numeric component
(events$map_coordinate_clean_number <- events$map_coordinate_clean %>% gsub("[A-Za-z]", "\\1", .) ) %>% janitor::tabyl() %>% mutate_if(is.numeric, round,2)
(events$map_coordinate_clean_text_band <- events$map_coordinate_clean_text %>% substring(1,1) ) %>% janitor::tabyl() %>% mutate_if(is.numeric, round,2)
(events$map_coordinate_clean_text_block <- events$map_coordinate_clean_text %>% substring(2,2) ) %>% janitor::tabyl() %>% mutate_if(is.numeric, round,2)
(events$map_coordinate_clean_text_subblock <- events$map_coordinate_clean_text %>% substring(3,3) ) %>% janitor::tabyl() %>% mutate_if(is.numeric, round,2)
(events$map_coordinate_clean_number_length <- events$map_coordinate_clean_number %>% nchar() ) %>% janitor::tabyl() %>% mutate_if(is.numeric, round,2)
(events$map_coordinate_clean_number_easting <- events$map_coordinate_clean_number %>%
substring(1, events$map_coordinate_clean_number_length/2) %>% as.numeric() ) %>%
janitor::tabyl() %>% mutate_if(is.numeric, round,2)
(events$map_coordinate_clean_number_northing <- events$map_coordinate_clean_number %>%
substring(events$map_coordinate_clean_number_length/2+1, events$map_coordinate_clean_number_length) %>%
as.numeric() ) %>%
janitor::tabyl() %>% mutate_if(is.numeric, round,2)
#
cat("\014")
print("Starting Converting Coordinates, may take some time")
for(i in 1:nrow(events)){
# print(i)
#The “exposition” pipe operator, %$% exposes the names within the left-hand side object to the right-hand side expression. Essentially, it is a short-hand for using the with functions
suppressMessages({
temp <- with(events[i,] ,
MeasuringLandscape:::EAGS2LatLong(band=map_coordinate_clean_text_band,
block=map_coordinate_clean_text_block,
subblock=map_coordinate_clean_text_subblock,
easting=map_coordinate_clean_number_easting ,
northing=map_coordinate_clean_number_northing)
)
events$map_coordinate_clean_latitude[i] <- temp$latitude
events$map_coordinate_clean_longitude[i] <- temp$longitude
})
#print(is.na(events$map_coordinate_clean_latitude[i]))
}
print("Finished Converting Coordinates")
#(temp <- events %>% mutate(map_coordinate_clean_row=1:n()) %>% filter(is.na(map_coordinate_clean_latitude) & !is.na(map_coordinate_clean)) %>% select(starts_with("map_coordinate_clean")) ) %>% distinct() %>% print(n=40) #visualize errors
#dim(temp) #195 coordinates don't convert.
testing=F
if(testing){
i=3684
events[i,] %>% select(starts_with("map_coordinate_clean")) %$% EAGS2LatLong(band=map_coordinate_clean_text_band,
block=map_coordinate_clean_text_block,
subblock=map_coordinate_clean_text_subblock,
easting=map_coordinate_clean_number_easting ,
northing=map_coordinate_clean_number_northing)
with(events[i,], map_coordinate_clean)
with(events[i,], map_coordinate)
band <- with(events[i,], map_coordinate_clean_text_band)
block <- with(events[i,],map_coordinate_clean_text_block)
subblock <- with(events[i,],map_coordinate_clean_text_subblock) #
easting <- with(events[i,],map_coordinate_clean_number_easting)
northing <- with(events[i,],map_coordinate_clean_number_northing)
}
stats::quantile(events$map_coordinate_clean_latitude, probs =c(.005,.01,.1,.5,.9,.99,.995), na.rm=T, type=9)
0.5% 1% 10% 50% 90% 99% 99.5%
-1.7190692 -1.5442831 -1.0403576 -0.4886414 0.0452442 0.7058103 2.2342679
stats::quantile(events$map_coordinate_clean_longitude, probs =c(.005,.01,.1,.5,.9,.99,.995), na.rm=T, type=9)
0.5% 1% 10% 50% 90% 99% 99.5%
35.67124 35.78910 36.36309 36.93511 37.45506 38.26145 39.87161
#plot(events$map_coordinate_clean_longitude,events$map_coordinate_clean_latitude) #plot with the outliers
#This is just to remove absolutely clear outliers. Not to set the region of interest.
#Outlier Bounding Box:
#NE 4.62933, 41.899059
#SW -4.71712, 33.90884
events$map_coordinate_clean_latitude[events$map_coordinate_clean_latitude < -4.71712 |
events$map_coordinate_clean_latitude>4.62933] <- NA
events$map_coordinate_clean_longitude[events$map_coordinate_clean_longitude < 33.90884 |
events$map_coordinate_clean_longitude>41.899059] <- NA
plot(events$map_coordinate_clean_longitude,events$map_coordinate_clean_latitude)
cat("\014")
#clean document district
events$document_district_clean <- events$document_district %>% stringi::stri_trans_totitle() %>% stringr::str_trim() %>% as.factor()
events$document_district_clean <- events$document_district_clean %>% forcats::fct_collapse(
'Embu'=c('Embu-Fort Hall Border'),
'Baringo'=c('BARINGO'),
'FORT HALL'=c('Fort Hall'),
'Naivasha'=c('Naviasha'),
'Nyeri' = c('Nyeri Settled Area'),
'Nyeri' = c('South Nyeri Reserve') ,
'Nairobi' = c('Jock Scott'),
NULL=c('',' ' ,'Document District','Kitui','Matathia', 'H/M','Reference Serial')
)
Unknown levels in f
: BARINGO
events$document_unit_type <- NA
condition <- events$document_district_clean %in% c("Rift Valley","Central Province"); table(condition)
condition FALSE TRUE 9278 1191
events$document_unit_type[condition] <- "Province"
#Jock Scott Nairobi City
condition <- events$document_district_clean %in% c("Nairobi"); table(condition)
condition FALSE TRUE 10075 394
events$document_unit_type[condition] <- "City"
condition <- events$document_district %in% c("JOCK SCOTT"); table(condition)
condition FALSE TRUE 10357 112
events$document_unit_type[condition] <- "Operation Jock Scott"
#Missing? Elgeyo/Marakwet
#Baringo, , Embu, Fort Hall, Kajiado, Kiambu, Kitui, Laikipia, Machakos, Meru, Naivasha, Nakuru, Nanyuki, Narok, Nyeri, Thika
condition <- events$document_district_clean %in% c("Baringo","Embu","Fort Hall","Kajiado","Kiambu",
"Laikipia","Machakos","Meru","Naivasha","Nakuru",
"Nanyuki","Narok","Nyeri","Thika"); table(condition)
condition FALSE TRUE 2808 7661
events$document_unit_type[condition] <- "District"
events$document_unit_type %>%
janitor::tabyl(sort = TRUE) %>%
janitor::adorn_crosstab(digits = 1)
‘janitor::adorn_crosstab’ is deprecated. Use ‘use the various adorn_ functions instead. See the “tabyl” vignette for examples.’ instead. See help(“Deprecated”)
events$document_district_clean %>%
janitor::tabyl(sort = TRUE) %>%
janitor::adorn_crosstab(digits = 1)
Factor dat
contains implicit NA, consider using forcats::fct_explicit_na
‘janitor::adorn_crosstab’ is deprecated. Use ‘use the various adorn_ functions instead. See the “tabyl” vignette for examples.’ instead. See help(“Deprecated”)
# Now we need to handle suffixes and combined locations
# "farm" now is followed by things because they crunched in additional location info at the end
## "coles estate farm
## agriculture experimental farm
## demonstration farm
## "farm near churo"
## reubens farm near churo
################################################
events$location_text_ruleclean <- events$location_text %>% stringr::str_trim() %>% tolower()
events <- events %>%
dplyr::select(-one_of("location_text_ruleclean_connector_prefix","location_text_ruleclean_connector_suffix")) %>% #this intentionally throws a warning
tidyr ::separate(col=location_text_ruleclean,
into=c("location_text_ruleclean_connector_prefix","location_text_ruleclean_connector_suffix"),
sep = " of | near ", remove=F, extra="drop", fill="right")
Unknown columns: `location_text_ruleclean_connector_prefix`, `location_text_ruleclean_connector_suffix`
events <- events %>% mutate(name_clean=stringr::str_trim(tolower(location_text))) %>%
mutate(name_clean_posessive=grepl("'s|`s",name_clean)) %>%
mutate(name_cleaner=trimws(name_clean) ) %>%
mutate(name_cleaner=gsub("'s|`s","",name_cleaner, fixed=T) ) %>%
mutate(name_cleaner= stringr::str_replace_all(name_cleaner, "[[:punct:]]|`", "") ) %>%
mutate(name_cleaner= stringr::str_replace_all(name_cleaner, "[^[:alnum:] ]", "") ) %>% #removes all the weird unicode and ascii
mutate(name_cleaner=trimws(name_cleaner) ) %>%
mutate(name_cleaner_nospace= stringr::str_replace_all(name_cleaner, " ", "") )
events_sf <- events %>% # filter(!is.na(longitude) & !is.na(latitude)) %>%
distinct() %>%
# filter( between(longitude, 30.0,45.0) ) %>% #Flag ROI but don't subset on it yet
# filter( between(latitude, -5.0,5.0) ) %>%
mutate(name_clean = stringr::str_trim(tolower(location_text))) %>%
mutate(name_clean_posessive = grepl("'s|`s", name_clean)) %>%
mutate(name_cleaner = trimws(name_clean)) %>%
mutate(name_cleaner = gsub("'s|`s", "", name_cleaner, fixed = T)) %>%
mutate(name_cleaner = stringr::str_replace_all(name_cleaner, "[[:punct:]]|`", "")) %>%
mutate(name_cleaner = trimws(name_cleaner)) %>%
mutate(name_cleaner_nospace = stringr::str_replace_all(name_cleaner, " ", ""))
# Avoid creating geometries where one of the two is NA
events_sf$map_coordinate_clean_longitude[is.na(events_sf$map_coordinate_clean_latitude)] <- NA
events_sf$map_coordinate_clean_latitude[is.na(events_sf$map_coordinate_clean_longitude)] <- NA
#events_sf$event_hash <- NULL #Make sure we're not hashing on the previous hash which might be a random walk
events_sf <- events_sf %>%
sf::st_as_sf(coords = c("map_coordinate_clean_longitude", "map_coordinate_clean_latitude"),
crs = 4326, agr = "constant", remove = F, na.fail = F) # %>%
#mutate(event_hash = apply(., 1, digest, algo="xxhash64") ) #Do this once and only once
valid <- sf::st_is_valid(events_sf$geometry); table(valid)
valid
TRUE
10469
eventsnames_sf <- events_sf %>%
select("name_cleaner", "geometry") %>%
setNames(c("name", "geometry")) %>%
mutate(source_dataset = "events")
saveRDS(events_sf, glue::glue(getwd(), "/../inst/extdata/events_sf.Rdata"))