Syllabus
Tech ‘Stack’ R/RStudio/RMarkdown
What should the relationship between the public sector and ML/AI be? Think of a few words and put them in the chat.
Review Coding Warmup 0
library(tidyverse)
file_loc <- '~/../Downloads/'
ccao <- read_csv(str_glue(file_loc,
'Assessor__Archived_05-11-2022__-_Residential_Modeling_Characteristics__Chicago_.csv'))
str(ccao)
## spc_tbl_ [678,656 x 80] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Property Index Number : chr [1:678656] "17-09-244-018-1031" "19-01-102-023-0000" "14-32-227-010-0000" "20-06-401-024-0000" ...
## $ Assessment Triad : chr [1:678656] "City" "City" "City" "City" ...
## $ Property Class : num [1:678656] 299 203 205 211 203 212 212 205 299 211 ...
## $ Property Address : chr [1:678656] NA "2952 W 39TH PL" "2039 N FREMONT ST" "4359 S WOLCOTT AVE" ...
## $ Property Apartment Num. : chr [1:678656] NA NA NA NA ...
## $ Property City : chr [1:678656] NA "CHICAGO" "CHICAGO" "CHICAGO" ...
## $ Township Name : chr [1:678656] "North Chicago" "Lake" "North Chicago" "Lake" ...
## $ Township Code : num [1:678656] 74 72 74 72 71 71 70 77 74 72 ...
## $ Neighborhood Code : num [1:678656] 74030 72040 74012 72080 71430 ...
## $ Township Neighborhood Only : chr [1:678656] "030" "040" "012" "080" ...
## $ Modeling Group : chr [1:678656] "NCHARS" "SF" "SF" "MF" ...
## $ Proportion Assessed : num [1:678656] 0.00166 1 1 1 1 ...
## $ Key PIN : chr [1:678656] "17-09-244-018-1001" NA NA NA ...
## $ Multi Code Indicator : num [1:678656] 0 0 0 0 0 0 0 0 0 0 ...
## $ Multi Code : num [1:678656] 2 2 NA 2 2 NA NA NA 2 2 ...
## $ Number of Active 288s (HIEs) : num [1:678656] NA 0 0 0 0 0 0 0 NA 0 ...
## $ Number of Ended 288s (HIEs) : num [1:678656] NA 0 0 0 0 0 0 0 NA 0 ...
## $ Sale Date : chr [1:678656] NA NA NA NA ...
## $ Sale Price : num [1:678656] NA NA NA NA NA NA NA NA NA NA ...
## $ Sale Document Number : num [1:678656] NA NA NA NA NA NA NA NA NA NA ...
## $ Land Square Feet : num [1:678656] NA 3100 6250 2812 5953 ...
## $ Large Home Indicator : logi [1:678656] NA FALSE FALSE FALSE FALSE FALSE ...
## $ Age : num [1:678656] 19 96 132 126 65 69 97 77 17 100 ...
## $ Apartments : chr [1:678656] NA NA NA "Three" ...
## $ Wall Material : chr [1:678656] NA "Frame" NA "Frame" ...
## $ Roof Material : chr [1:678656] NA "Shingle + Asphalt" NA "Shingle + Asphalt" ...
## $ Rooms : num [1:678656] NA 4 0 20 5 0 0 0 NA 7 ...
## $ Bedrooms : num [1:678656] NA 2 0 10 2 0 0 0 NA 5 ...
## $ Basement : chr [1:678656] NA "Slab" NA "Full" ...
## $ Basement Finish : chr [1:678656] NA "Unfinished" NA "Apartment" ...
## $ Central Heating : chr [1:678656] NA "Warm Air Furnace" NA "None" ...
## $ Central Air Conditioning : chr [1:678656] NA "No Central A/C" NA "No Central A/C" ...
## $ Fireplaces : num [1:678656] NA 0 NA 0 0 NA NA NA NA 0 ...
## $ Attic Type : chr [1:678656] NA "None" NA "None" ...
## $ Full Baths : num [1:678656] NA 1 0 5 1 0 0 0 NA 3 ...
## $ Half Baths : num [1:678656] NA 0 0 0 0 0 0 0 NA 0 ...
## $ Design Plan : chr [1:678656] NA NA NA "Stock Plan" ...
## $ Cathedral Ceiling : chr [1:678656] NA NA NA NA ...
## $ Garage 1 Size : chr [1:678656] NA "0 cars" NA "0 cars" ...
## $ Garage 1 Material : chr [1:678656] NA NA NA NA ...
## $ Garage 1 Attached : logi [1:678656] NA NA NA NA FALSE NA ...
## $ Garage 1 Area : logi [1:678656] NA NA NA NA FALSE NA ...
## $ Building Square Feet : num [1:678656] NA 1044 0 4320 1098 ...
## $ Repair Condition : chr [1:678656] NA "Average" NA "Average" ...
## $ Use : chr [1:678656] NA "Single-Family" NA "Multi-Family" ...
## $ Type of Residence : chr [1:678656] NA "1 Story" NA "2 Story" ...
## $ Attic Finish : chr [1:678656] NA NA NA NA ...
## $ Renovation : chr [1:678656] NA NA NA NA ...
## $ Porch : chr [1:678656] NA NA NA NA ...
## $ Property Zip Code : chr [1:678656] NA "60632-1804" "60614-4363" "60609-3124" ...
## $ Tract GEOID : num [1:678656] 1.7e+10 1.7e+10 1.7e+10 1.7e+10 1.7e+10 ...
## $ Municipality FIPS Code : num [1:678656] 14000 14000 14000 14000 14000 14000 14000 14000 14000 14000 ...
## $ Municipality Name : chr [1:678656] "Chicago" "Chicago" "Chicago" "Chicago" ...
## $ O'Hare Noise Indicator : num [1:678656] 0 0 0 0 0 0 0 0 0 0 ...
## $ FEMA Floodplain : num [1:678656] 0 0 0 0 0 0 0 0 0 0 ...
## $ Flood Risk Factor : num [1:678656] 4 6 1 5 1 3 1 3 1 1 ...
## $ Flood Risk Direction : num [1:678656] 0 0 0 0 0 1 0 1 0 0 ...
## $ Road Proximity < 100 Feet : num [1:678656] 0 0 0 0 0 0 0 0 0 0 ...
## $ Road Proximity 101 - 300 Feet : num [1:678656] 0 0 0 0 0 0 1 0 1 0 ...
## $ Elementary/Middle School District : chr [1:678656] "OGDEN" "DAVIS, N" "MAYER" "LARA" ...
## $ High School District : chr [1:678656] "WELLS HS" "KELLY HS" "LINCOLN PARK HS" "RICHARDS HS" ...
## $ Longitude : num [1:678656] -87.6 -87.7 -87.7 -87.7 -87.8 ...
## $ Latitude : num [1:678656] 41.9 41.8 41.9 41.8 42 ...
## $ geoposition : chr [1:678656] "POINT (-87.6344132462695 41.8913692293796)" "POINT (-87.6992293705144 41.8220515067538)" "POINT (-87.6506234192346 41.9193306987512)" "POINT (-87.6720496981807 41.8141134023529)" ...
## $ Tax Rate : num [1:678656] 6.89 6.93 6.89 6.93 6.89 ...
## $ Tract Median Income : num [1:678656] 128243 39706 119432 33507 75601 ...
## $ Condo Model: Building PIN : chr [1:678656] "17-09-244-018" NA NA NA ...
## $ Condo Model: Modeled Unit Strata (of 10) : chr [1:678656] NA NA NA NA ...
## $ Condo Model: Modeled Unit Strata (of 300): chr [1:678656] NA NA NA NA ...
## $ Modeled Sale Year : num [1:678656] 2021 2021 2021 2021 2021 ...
## $ Modeled Sale Month of Year : chr [1:678656] "Jan" "Jan" "Jan" "Jan" ...
## $ Modeled Sale Day of Year : num [1:678656] 1 1 1 1 1 1 1 1 1 1 ...
## $ Modeled Sale Week of Year : num [1:678656] 1 1 1 1 1 1 1 1 1 1 ...
## $ Modeled Sale Quarter of Year : chr [1:678656] "Q1" "Q1" "Q1" "Q1" ...
## $ Modeled Sale Quarter : num [1:678656] 96 96 96 96 96 96 96 96 96 96 ...
## $ Modeled Sale Month : num [1:678656] 288 288 288 288 288 288 288 288 288 288 ...
## $ Modeled Sale Week : num [1:678656] 1252 1252 1252 1252 1252 ...
## $ Modeled Sale Day : num [1:678656] 8766 8766 8766 8766 8766 ...
## $ Modeled Sale During School Year : logi [1:678656] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ Modeled Sale During Holidays : logi [1:678656] TRUE TRUE TRUE TRUE TRUE TRUE ...
## - attr(*, "spec")=
## .. cols(
## .. `Property Index Number` = col_character(),
## .. `Assessment Triad` = col_character(),
## .. `Property Class` = col_double(),
## .. `Property Address` = col_character(),
## .. `Property Apartment Num.` = col_character(),
## .. `Property City` = col_character(),
## .. `Township Name` = col_character(),
## .. `Township Code` = col_double(),
## .. `Neighborhood Code` = col_double(),
## .. `Township Neighborhood Only` = col_character(),
## .. `Modeling Group` = col_character(),
## .. `Proportion Assessed` = col_double(),
## .. `Key PIN` = col_character(),
## .. `Multi Code Indicator` = col_double(),
## .. `Multi Code` = col_double(),
## .. `Number of Active 288s (HIEs)` = col_double(),
## .. `Number of Ended 288s (HIEs)` = col_double(),
## .. `Sale Date` = col_character(),
## .. `Sale Price` = col_double(),
## .. `Sale Document Number` = col_double(),
## .. `Land Square Feet` = col_double(),
## .. `Large Home Indicator` = col_logical(),
## .. Age = col_double(),
## .. Apartments = col_character(),
## .. `Wall Material` = col_character(),
## .. `Roof Material` = col_character(),
## .. Rooms = col_double(),
## .. Bedrooms = col_double(),
## .. Basement = col_character(),
## .. `Basement Finish` = col_character(),
## .. `Central Heating` = col_character(),
## .. `Central Air Conditioning` = col_character(),
## .. Fireplaces = col_double(),
## .. `Attic Type` = col_character(),
## .. `Full Baths` = col_double(),
## .. `Half Baths` = col_double(),
## .. `Design Plan` = col_character(),
## .. `Cathedral Ceiling` = col_character(),
## .. `Garage 1 Size` = col_character(),
## .. `Garage 1 Material` = col_character(),
## .. `Garage 1 Attached` = col_logical(),
## .. `Garage 1 Area` = col_logical(),
## .. `Building Square Feet` = col_double(),
## .. `Repair Condition` = col_character(),
## .. Use = col_character(),
## .. `Type of Residence` = col_character(),
## .. `Attic Finish` = col_character(),
## .. Renovation = col_character(),
## .. Porch = col_character(),
## .. `Property Zip Code` = col_character(),
## .. `Tract GEOID` = col_double(),
## .. `Municipality FIPS Code` = col_double(),
## .. `Municipality Name` = col_character(),
## .. `O'Hare Noise Indicator` = col_double(),
## .. `FEMA Floodplain` = col_double(),
## .. `Flood Risk Factor` = col_double(),
## .. `Flood Risk Direction` = col_double(),
## .. `Road Proximity < 100 Feet` = col_double(),
## .. `Road Proximity 101 - 300 Feet` = col_double(),
## .. `Elementary/Middle School District` = col_character(),
## .. `High School District` = col_character(),
## .. Longitude = col_double(),
## .. Latitude = col_double(),
## .. geoposition = col_character(),
## .. `Tax Rate` = col_double(),
## .. `Tract Median Income` = col_double(),
## .. `Condo Model: Building PIN` = col_character(),
## .. `Condo Model: Modeled Unit Strata (of 10)` = col_character(),
## .. `Condo Model: Modeled Unit Strata (of 300)` = col_character(),
## .. `Modeled Sale Year` = col_double(),
## .. `Modeled Sale Month of Year` = col_character(),
## .. `Modeled Sale Day of Year` = col_double(),
## .. `Modeled Sale Week of Year` = col_double(),
## .. `Modeled Sale Quarter of Year` = col_character(),
## .. `Modeled Sale Quarter` = col_double(),
## .. `Modeled Sale Month` = col_double(),
## .. `Modeled Sale Week` = col_double(),
## .. `Modeled Sale Day` = col_double(),
## .. `Modeled Sale During School Year` = col_logical(),
## .. `Modeled Sale During Holidays` = col_logical()
## .. )
## - attr(*, "problems")=<externalptr>
ccao %>% glimpse()
## Rows: 678,656
## Columns: 80
## $ `Property Index Number` <chr> "17-09-244-018-1031", "19-~
## $ `Assessment Triad` <chr> "City", "City", "City", "C~
## $ `Property Class` <dbl> 299, 203, 205, 211, 203, 2~
## $ `Property Address` <chr> NA, "2952 W 39TH PL", "203~
## $ `Property Apartment Num.` <chr> NA, NA, NA, NA, NA, NA, NA~
## $ `Property City` <chr> NA, "CHICAGO", "CHICAGO", ~
## $ `Township Name` <chr> "North Chicago", "Lake", "~
## $ `Township Code` <dbl> 74, 72, 74, 72, 71, 71, 70~
## $ `Neighborhood Code` <dbl> 74030, 72040, 74012, 72080~
## $ `Township Neighborhood Only` <chr> "030", "040", "012", "080"~
## $ `Modeling Group` <chr> "NCHARS", "SF", "SF", "MF"~
## $ `Proportion Assessed` <dbl> 0.0016613, 1.0000000, 1.00~
## $ `Key PIN` <chr> "17-09-244-018-1001", NA, ~
## $ `Multi Code Indicator` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ `Multi Code` <dbl> 2, 2, NA, 2, 2, NA, NA, NA~
## $ `Number of Active 288s (HIEs)` <dbl> NA, 0, 0, 0, 0, 0, 0, 0, N~
## $ `Number of Ended 288s (HIEs)` <dbl> NA, 0, 0, 0, 0, 0, 0, 0, N~
## $ `Sale Date` <chr> NA, NA, NA, NA, NA, NA, NA~
## $ `Sale Price` <dbl> NA, NA, NA, NA, NA, NA, NA~
## $ `Sale Document Number` <dbl> NA, NA, NA, NA, NA, NA, NA~
## $ `Land Square Feet` <dbl> NA, 3100, 6250, 2812, 5953~
## $ `Large Home Indicator` <lgl> NA, FALSE, FALSE, FALSE, F~
## $ Age <dbl> 19, 96, 132, 126, 65, 69, ~
## $ Apartments <chr> NA, NA, NA, "Three", NA, N~
## $ `Wall Material` <chr> NA, "Frame", NA, "Frame", ~
## $ `Roof Material` <chr> NA, "Shingle + Asphalt", N~
## $ Rooms <dbl> NA, 4, 0, 20, 5, 0, 0, 0, ~
## $ Bedrooms <dbl> NA, 2, 0, 10, 2, 0, 0, 0, ~
## $ Basement <chr> NA, "Slab", NA, "Full", "F~
## $ `Basement Finish` <chr> NA, "Unfinished", NA, "Apa~
## $ `Central Heating` <chr> NA, "Warm Air Furnace", NA~
## $ `Central Air Conditioning` <chr> NA, "No Central A/C", NA, ~
## $ Fireplaces <dbl> NA, 0, NA, 0, 0, NA, NA, N~
## $ `Attic Type` <chr> NA, "None", NA, "None", "N~
## $ `Full Baths` <dbl> NA, 1, 0, 5, 1, 0, 0, 0, N~
## $ `Half Baths` <dbl> NA, 0, 0, 0, 0, 0, 0, 0, N~
## $ `Design Plan` <chr> NA, NA, NA, "Stock Plan", ~
## $ `Cathedral Ceiling` <chr> NA, NA, NA, NA, "No", NA, ~
## $ `Garage 1 Size` <chr> NA, "0 cars", NA, "0 cars"~
## $ `Garage 1 Material` <chr> NA, NA, NA, NA, "Frame", N~
## $ `Garage 1 Attached` <lgl> NA, NA, NA, NA, FALSE, NA,~
## $ `Garage 1 Area` <lgl> NA, NA, NA, NA, FALSE, NA,~
## $ `Building Square Feet` <dbl> NA, 1044, 0, 4320, 1098, 0~
## $ `Repair Condition` <chr> NA, "Average", NA, "Averag~
## $ Use <chr> NA, "Single-Family", NA, "~
## $ `Type of Residence` <chr> NA, "1 Story", NA, "2 Stor~
## $ `Attic Finish` <chr> NA, NA, NA, NA, NA, NA, NA~
## $ Renovation <chr> NA, NA, NA, NA, NA, NA, NA~
## $ Porch <chr> NA, NA, NA, NA, NA, NA, NA~
## $ `Property Zip Code` <chr> NA, "60632-1804", "60614-4~
## $ `Tract GEOID` <dbl> 17031081800, 17031580501, ~
## $ `Municipality FIPS Code` <dbl> 14000, 14000, 14000, 14000~
## $ `Municipality Name` <chr> "Chicago", "Chicago", "Chi~
## $ `O'Hare Noise Indicator` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ `FEMA Floodplain` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ `Flood Risk Factor` <dbl> 4, 6, 1, 5, 1, 3, 1, 3, 1,~
## $ `Flood Risk Direction` <dbl> 0, 0, 0, 0, 0, 1, 0, 1, 0,~
## $ `Road Proximity < 100 Feet` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ `Road Proximity 101 - 300 Feet` <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 1,~
## $ `Elementary/Middle School District` <chr> "OGDEN", "DAVIS, N", "MAYE~
## $ `High School District` <chr> "WELLS HS", "KELLY HS", "L~
## $ Longitude <dbl> -87.63441, -87.69923, -87.~
## $ Latitude <dbl> 41.89137, 41.82205, 41.919~
## $ geoposition <chr> "POINT (-87.6344132462695 ~
## $ `Tax Rate` <dbl> 6.890000, 6.934000, 6.8900~
## $ `Tract Median Income` <dbl> 128243, 39706, 119432, 335~
## $ `Condo Model: Building PIN` <chr> "17-09-244-018", NA, NA, N~
## $ `Condo Model: Modeled Unit Strata (of 10)` <chr> NA, NA, NA, NA, NA, NA, NA~
## $ `Condo Model: Modeled Unit Strata (of 300)` <chr> NA, NA, NA, NA, NA, NA, NA~
## $ `Modeled Sale Year` <dbl> 2021, 2021, 2021, 2021, 20~
## $ `Modeled Sale Month of Year` <chr> "Jan", "Jan", "Jan", "Jan"~
## $ `Modeled Sale Day of Year` <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1,~
## $ `Modeled Sale Week of Year` <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1,~
## $ `Modeled Sale Quarter of Year` <chr> "Q1", "Q1", "Q1", "Q1", "Q~
## $ `Modeled Sale Quarter` <dbl> 96, 96, 96, 96, 96, 96, 96~
## $ `Modeled Sale Month` <dbl> 288, 288, 288, 288, 288, 2~
## $ `Modeled Sale Week` <dbl> 1252, 1252, 1252, 1252, 12~
## $ `Modeled Sale Day` <dbl> 8766, 8766, 8766, 8766, 87~
## $ `Modeled Sale During School Year` <lgl> TRUE, TRUE, TRUE, TRUE, TR~
## $ `Modeled Sale During Holidays` <lgl> TRUE, TRUE, TRUE, TRUE, TR~
# count property classes
ccao %>% count(`Property Class`)
ccao %>% group_by(`Property Class`) %>%
summarize(n = n())
ccao %>% count(`Property Class`, sort=TRUE)
count(ccao, `Property Class`)
# top five classes
ccao %>% count(`Property Class`) %>%
slice_max(order_by = `n`, n=5)
# keep only townships with Chicago in the name
ccao %>% distinct(`Township Name`)
ccao %>% filter(str_detect(`Township Name`, '(?<=g)o')) %>%
count(`Township Name`)
# average rooms by property class
ccao %>% group_by(`Property Class`) %>%
summarize(avg_rooms = mean(Rooms),
count = n()) %>% view()
# lubridate
library(lubridate)
ccao %>% select(`Sale Date`)
ccao %>% select(contains('Date'))
ccao %>% count(`Sale Date`, sort=TRUE)
ccao <- ccao %>%
mutate(
`Sale Date` = mdy_hms(`Sale Date`)
)
ccao %>% count(`Sale Date`)
# count sales in January 2019
ccao %>% filter(
month(`Sale Date`) == 1 & year(`Sale Date`) == 2019
)
ccao %>% mutate(
new_date = floor_date(`Sale Date`, 'month')
) %>% select(new_date) %>% filter(
!is.na(new_date),
new_date == ymd_hms('2019-01-01 00:00:00'))
# count sales by month
ccao %>% group_by(
month = floor_date(`Sale Date`, 'month')
) %>% summarize(
count = n()
)
# ggplot2
library(ggplot2)
theme_set(theme_bw())
# sales by week
gdata <- ccao %>%
filter(!is.na(`Sale Date`)) %>%
group_by(week = floor_date(`Sale Date`, 'week')) %>%
summarize(count = n())
ggplot(gdata, aes(x=week, y=count)) +
geom_point() +
geom_line()
ggplot(gdata, aes(x=week, y=count)) +
geom_point(size=2) + geom_line(linewidth=1.5) +
scale_x_datetime(date_breaks = '6 months', date_labels='%m/%d/%y') +
labs(x='Week of', y='Sales', title='Sales in Chicago by Week')
# sale price density
library(scales)
ggplot(ccao, aes(x=`Sale Price`)) +
geom_density(fill='navy')
ggplot(ccao, aes(x=`Sale Price`)) +
geom_density(fill='navy') +
scale_x_log10(labels=dollar_format())
ggplot(ccao, aes(x=`Township Name`, y=`Sale Price`)) +
geom_boxplot(fill='navy') +
scale_y_log10(labels=dollar_format()) +
coord_flip()