Нужна помощь в сортировке данных по карте
Я ищу помощь в сортировке данных для карты.
Я пытаюсь создать карту хороплета с помощью R, используя почтовый индекс и случаи кражи со взломом.
Однако функция карты не работает должным образом, или, возможно, введенные мной данные не соответствуют нужному типу или что-то в этом роде.
Проблема в том, что когда я создаю карту, градиент цвета не выводится должным образом: он имеет неправильный порядок.
Вот как выглядит вывод в данный момент: https://imgur.com/cvKXPAQ
Вы можете видеть, что он насчитывает 1, 11, 12, 13, 14, 15, ... 2, 3, 4 ...
Я бы хотел посчитать от 1 (мин) до 15 (макс)
Практически все работает, вплоть до & за исключением последней функции (функция choropleth zipcode map, вызываемая для sortedData_BurglaryPerZip).
Спасибо за вашу помощь!
Вот код:
# # use the devtools package from CRAN to install choroplethrZip from github
# # install.packages("devtools")
# install_github('arilamstein/choroplethrZip')
# These are needed:
detach("package:plyr", unload=TRUE) # <- might not be necessary.
# Import & inspect 2014 neighborhood economic data
austin2014_data_raw <- read_csv('https://data.austintexas.gov/resource/hcnj-rei3.csv', na = '')
# # View(austin2014_data_raw)
# Clean it: Remove the first row, which has no zip code (this row is the average data for all of Austin)
austin2014_data <- austin2014_data_raw[-1,]
# # View(austin2014_data)
nrow(austin2014_data) # now there's two less rows. <-- Solution: on line above, check only by zipcode: na.omit(data_raw$Zipcode)
# or, get the alternative from Stackru
# Grab the zip code data separately
# (when I tried to grab it just by doing: austin2014_data$`Zip Code` the outputted column looks weird)
zipCodesOfData <- fread('https://data.austintexas.gov/resource/hcnj-rei3.csv') %>%
mutate(`Zip Code` = ifelse(`Zip Code` == "", NA, `Zip Code`)) %>%
na.omit() %>%
select(`Zip Code`)
# View(zipCodesOfData)
# Rename it from "Zip Code" to "ZipCode". Recombine it with the dataset.
names(zipCodesOfData) <- "ZipCode"
austin2014_data <- cbind(austin2014_data, zipCodesOfData)
# View(austin2014_data)
# Select a few columns for our neighborhood economic data subset
columnSelection <- c("ZipCode", "Population below poverty level", "Median household income", "Unemployment", "Median rent", "Percentage of rental units in poor condition")
austin2014_EconData_selection <- subset(austin2014_data, select=columnSelection)
# View(austin2014_EconData_selection)
# Reset row number index since we removed the first row (r kept the original index, now we're gonna reset it)
rownames(austin2014_EconData_selection) <- 1:nrow(austin2014_EconData_selection)
# View(austin2014_EconData_selection)
# Import crime data
# Import data
austinCrime2014_data_raw <- read_csv('https://data.austintexas.gov/resource/7g8v-xxja.csv', na = '')
# How many unique zipcodes?
length(unique(austinCrime2014_data_raw$`GO Location Zip`))
# # View(austinCrime2014_data_raw)
# Select and rename required columns
columnSelection_Crime <- c("GO Location Zip", "GO Highest Offense Desc", "Highest NIBRS/UCR Offense Description")
austinCrime_dataset <- select(austinCrime2014_data_raw, one_of(columnSelection_Crime))
names(austinCrime_dataset) <- c("zipcode", "highestOffenseDesc", "NIBRS_OffenseDesc")
# Filter crime data by zipcodes available in the neighborhood economic data subset
length(unique(austinCrime2014_data_raw$`GO Location Zip`)) # 36
austinCrime2014_data_selected_zips <- filter(austinCrime_dataset, zipcode %in% austin2014_EconData_selection$ZipCode)
length(unique(austinCrime2014_data_selected_zips$zipcode)) # 31
# Convert our crime data subset from string/char data into factorized data so we can see levels
# let's make the character data columns c("highestOffenseDesc", "NIBRS_OffenseDesc") into factors so we can check its levels
glimpse(austinCrime2014_data_selected_zips) # characters
cols <- c("highestOffenseDesc", "NIBRS_OffenseDesc") # columns with character datatype to convert to factor datatype
austinCrime2014_data_selected_zips[cols] <- lapply(austinCrime2014_data_selected_zips[cols], factor)
glimpse(austinCrime2014_data_selected_zips) # factors
# View(austinCrime2014_data_selected_zips)
levels(austinCrime2014_data_selected_zips$highestOffenseDesc) #--> looks good
levels(austinCrime2014_data_selected_zips$NIBRS_OffenseDesc) # output is weird: "Burglary / \nBreaking & Entering" "Robbery"
## Shortening the entry "Burglary / Breaking & Entering" to just "Burglary
columnWithStringReplaced<- gsub("Burglary / \nBreaking & Entering", "Burglary", austinCrime2014_data_selected_zips$NIBRS_OffenseDesc)
austinCrime2014_data_selected_zips <- cbind(austinCrime2014_data_selected_zips, columnWithStringReplaced)
austinCrime2014_data_selected_zips <- austinCrime2014_data_selected_zips[,-3] #remove old NIBRS column
# View(austinCrime2014_data_selected_zips)
names(austinCrime2014_data_selected_zips) <- c("zipcode", "highestOffenseDesc)", "NIBRS_OffenseDesc") # make all columns have the right same
# # View our data
# View(austin2014_EconData_selection)
# View(austinCrime2014_data_selected_zips)
levels(austinCrime2014_data_selected_zips$highestOffenseDesc) #--> looks good
levels(austinCrime2014_data_selected_zips$NIBRS_OffenseDesc) #--> looks good
# Next step: counting NIBRS crimes per zipcode
zipCrimeCountNIBRS <- austinCrime2014_data_selected_zips[,-2]
# This gives us count of NIBRS_OffenseDesc per Zipcode
zipCrimeCountNIBRS = zipCrimeCountNIBRS %>%
group_by(zipcode, NIBRS_OffenseDesc) %>%
mutate(occ = n())
# View(zipCrimeCountNIBRS)
# filter and group the crimes (burglary, robbery) by the zipcodes present
# this is using dplyr
rob_and_burg_perZip <- zipCrimeCountNIBRS %>%
group_by(zipcode, occ, NIBRS_OffenseDesc) %>%
summarise() %>%
select(zipcode=zipcode, occ, NIBRS_OffenseDesc)
# View(rob_and_burg_perZip)
# lets check unique zipcodes
length(unique(rob_and_burg_perZip$zipcode)) # 31 zipcodes present
print(nrow(rob_and_burg_perZip)/2) # but some don't have both crimes accounted for
# select out all zipcodes with robbery -- something like this:
# select zipcode, NIBRS_OffenseDesc
# where NIBRS_OffenseDesc = robbery
robberyPerZip <- filter(rob_and_burg_perZip, NIBRS_OffenseDesc == "Robbery")
# View(robberyPerZip) # has 30 rows
# need to diff the zips and tack on ones not shown
# I found this function here: http://stackru.com/questions/21574214/finding-elements-that-do-not-overlap-between-two-vectors
mysetdiff<-function (x, y, multiple=FALSE)
x <- as.vector(x)
y <- as.vector(y)
if (length(x) || length(y)) {
if (!multiple) {
unique( x[match(x, y, 0L) == 0L])
}else x[match(x, y, 0L) == 0L]
} else x
# This shows the one zipcode missing in robberyPerZip$zipcode
diff_zip_Robbery <- mysetdiff(rob_and_burg_perZip$zipcode, robberyPerZip$zipcode)
# need to create list: 1 row, 3 columns, then rbind it
# View(robberyPerZip)
list_to_append <- list(diff_zip_Robbery, 0, "Robbery")
names(list_to_append) <- c("zipcode", "occ", "NIBRS_OffenseDesc")
robberyPerZip_done <- rbind(robberyPerZip, list_to_append)
# View(robberyPerZip_done)
#### Same thing as last step, but with burglary instead of robbery:
# select out all zipcodes with burglary
burglaryPerZip <- filter(rob_and_burg_perZip, NIBRS_OffenseDesc == "Burglary")
# View(burglaryPerZip) # has 28 rows
# This shows the three zipcodes missing in burglaryPerZip$zipcode
diff_zip_Burglary <- mysetdiff(rob_and_burg_perZip$zipcode, burglaryPerZip$zipcode)
newMatrix <- cbind(diff_zip_Burglary, c(0,0,0), c("Burglary", "Burglary", "Burglary"))
colnames(newMatrix) <- c("zipcode", "occ", "NIBRS_OffenseDesc")
# View(newMatrix)
## make both (a matrix, newMatrix) and (a list, burglaryPerZip) into data frames in order to combine them
mat_df <- data.frame(newMatrix)
burg_df <- data.frame(burglaryPerZip)
# doesn't work b/c they're different object types:
# burglaryPerZip_done <- rbind(burglaryPerZip, newMatrix)
# but since we converted into dataframe, this works:
burglaryPerZip_done <- rbind(burg_df, mat_df)
burglaryPerZip_done <- as.list(burglaryPerZip_done) # --> convert back into a list
## Now we have a list of robberies and burglaries per zipcode, including those with zero per zipcode:
# View(burglaryPerZip_done)
# View(robberyPerZip_done)
# Need to provide list of 'region' (i.e. zipcode) and 'value' (i.e. numeric value to map)
## Burglary:
burglary_occuranceByZip <- burglaryPerZip_done[-3] #take off the crime description column
names(burglary_occuranceByZip) <- c("region", "value") # rename to region, value
burglary_occuranceByZip <- data.frame(burglary_occuranceByZip) # make it a dataframe
sortedData_BurglaryPerZip <- burglary_occuranceByZip[order(as.numeric(as.character(burglary_occuranceByZip$value))),] # order by value
## Robbery:
Robbery_occuranceByZip <- robberyPerZip_done[-3] #take off the crime description column
names(Robbery_occuranceByZip) <- c("region", "value") # rename to region, value
Robbery_occuranceByZip <- data.frame(Robbery_occuranceByZip) # make it a dataframe
sortedData_RobberyPerZip <- Robbery_occuranceByZip[order(as.numeric(as.character(Robbery_occuranceByZip$value))),] # order by value
# Now we're ready to map.
# First, let's map raw numbers of crimes per zipcode
# Then, let's divide crimes by population and map that as well.
# burglary_occuranceByZip$value <- as.numeric(as.character(sortedData_BurglaryPerZip$value))
# burglary_occuranceByZip$region <- as.numeric(as.character(sortedData_BurglaryPerZip$region))
# class(sortedData_BurglaryPerZip)
# class(sortedData_BurglaryPerZip$value)
# glimpse(sortedData_BurglaryPerZip)
# View(sortedData_BurglaryPerZip)
zip_zoom = sortedData_BurglaryPerZip$region,
title = "Burglary occurances by zipcode",
legend = "Burglaries",
) + coord_map()
## Robberies map:
sortedData_RobberyPerZip$region <- as.character(sortedData_RobberyPerZip$region)
zip_zoom = sortedData_RobberyPerZip$region,
title = "Burglary occurances by zipcode",
legend = "Burglaries",
) + coord_map()
## Burglaries map:
print(sortedData_BurglaryPerZip$value) # Value is sorted
sortedData_BurglaryPerZip$region <- as.character(sortedData_BurglaryPerZip$region)
sortedData_BurglaryPerZip$value <- as.character(sortedData_BurglaryPerZip$value)
zip_zoom = sortedData_BurglaryPerZip$region,
title = "Burglary occurances by zipcode",
legend = "Burglaries",
) + coord_map()
### Yet this map's legend and order of shading is out of order.