Check the page’s terms of service first, and then verify with {robotstxt}
Event: on_not_foundEvent: on_file_type_mismatchEvent: on_suspect_content
[robots.txt]
--------------------------------------
# robots.txt overwrite by: on_suspect_content
User-agent: *
Allow: /
[events]
--------------------------------------
requested: https://global.rstudio.com/student/all_events/robots.txt
downloaded: https://global.rstudio.com/student/all_events/robots.txt
$on_not_found
$on_not_found$status_code
[1] 404
$on_file_type_mismatch
$on_file_type_mismatch$content_type
[1] "text/html; charset=utf-8"
$on_suspect_content
$on_suspect_content$parsable
[1] FALSE
$on_suspect_content$content_suspect
[1] TRUE
[attributes]
--------------------------------------
problems, cached, request, class
Good to check if you can do it in one page, and then if it works, scale.
# scraping event titles --
get_titles <- name <- function(page_number) {
Sys.sleep(2)
link <- paste0("https://global.rstudio.com/student/all_events?page=", page_number)
read_html(link) %>%
html_nodes(".session__name") %>%
html_text()
}
# scraping event dates and times --
get_dates <- name <- function(page_number) {
Sys.sleep(2)
link <- paste0("https://global.rstudio.com/student/all_events?page=", page_number)
read_html(link) %>%
html_nodes(".session__dates.session__dates--index") %>%
html_text()
}
[1] "Discussion: Modelling 2 - Session 1"
[2] "Your Public Garden - Session 1"
[3] "Q&A: Vicki Boykis - Session 1"
[4] "Introducing xrprof: A New Way to Profile R - Session 1"
[5] "The Opioid Files: Turning big pharmacy data over to the public - Session 1"
[6] "Not The App We Deserve. The App We Need: Putting a GMP Shiny App into Production - Session 1"
[7] "plumber + future: Async Web APIs - Session 1"
[8] "rKenyaCensus Package - Session 1"
[9] "How we made the switch: a case study on automating a complex report. - Session 1"
[10] "Making Shiny apps faster with caching - Session 1"
[11] "Discussion: Data for good 1 - Session 1"
[12] "Discussion: Programming 1 - Session 1"
[13] "Discussion: Organisational tooling 1 - Session 1"
[14] "Humanitarian Data Science with R - Session 1"
[15] "Bigger Data With Ease Using Apache Arrow - Session 1"
[16] "From Zero to Hero: Best practices for setting up Rstudio Team in the Cloud - Session 1"
[17] "How reproducible am I? A retrospective on a year of commercial data science projects in R - Session 1"
[18] "Cognitive speed: How the Tidyverse helped the British Red Cross respond quickly to COVID-19 - Session 1"
[19] "Easy larger-than-RAM data manipulation with {disk.frame} - Session 1"
[20] "Lifelong Learning with R Weekly - Session 1"
[21] "xaringan Playground: Using xaringan to learn web development - Session 1"
[22] "Custom theming in Shiny & R Markdown with bslib & thematic - Session 1"
[23] "parsermd - parsing R Markdown for fun and profit - Session 1"
[24] "How Content Makes the Data Go 'Round - Session 1"
This step will try out the get_titles()
function on pages 2, 3, 4
[[1]]
[1] "Q&A: John Burn-Murdoch - Session 1"
[2] "oRganization: How to make internal R packages part of your team - Session 1"
[3] "Making the jump from learning to applying: R training and documentation for different levels of expertise - Session 1"
[4] "What's new in tidymodels? - Session 1"
[5] "Using R to Up Your Experimentation Game - Session 1"
[6] "You're Already Ready: Zen and the Art of R Package Development - Session 1"
[7] "On programming, teaching, and building interactive tutorials with learnr:: - Session 1"
[8] "Discussion: Modelling 1 - Session 1"
[9] "Discussion: Package dev 1 - Session 1"
[10] "Discussion: Teaching 1 - Session 1"
[11] "Fairness and Data Science: Failures, Factors, and Futures - Session 1"
[12] "Monitoring health and impact of open-source projects - Session 1"
[13] "Feedback at scale - Session 1"
[14] "Make a package - Make some friends - Session 1"
[15] "How to do things with words: learning to program in R with a \"communicative approach\" - Session 1"
[16] "tidymodels/stacks, Or, In Preparation for Pesto: A Grammar for Stacked Ensemble Modeling - Session 1"
[17] "Using formr to create R-powered surveys with individualized feedback - Session 1"
[18] "Using Guided Simulation Exercises to Teach Data Science with R - Session 1"
[19] "Towards an integrated {verse}: lessons learned developing a library of validated packages - Session 1"
[20] "How I became a Data Composer – examples of simulated datasets that bring value to a data-driven company - Session 1"
[21] "The Power of Great Datasets - Session 1"
[22] "Discussion: Teaching 2 - Session 1"
[23] "Categorical Embeddings: New Ways to Simplify Complex Data - Session 1"
[24] "Discussion: Package dev 2 - Session 1"
[[2]]
[1] "Discussion: Modelling 2 - Session 1"
[2] "Your Public Garden - Session 1"
[3] "Q&A: Vicki Boykis - Session 1"
[4] "Introducing xrprof: A New Way to Profile R - Session 1"
[5] "The Opioid Files: Turning big pharmacy data over to the public - Session 1"
[6] "Not The App We Deserve. The App We Need: Putting a GMP Shiny App into Production - Session 1"
[7] "plumber + future: Async Web APIs - Session 1"
[8] "rKenyaCensus Package - Session 1"
[9] "How we made the switch: a case study on automating a complex report. - Session 1"
[10] "Making Shiny apps faster with caching - Session 1"
[11] "Discussion: Data for good 1 - Session 1"
[12] "Discussion: Programming 1 - Session 1"
[13] "Discussion: Organisational tooling 1 - Session 1"
[14] "Humanitarian Data Science with R - Session 1"
[15] "Bigger Data With Ease Using Apache Arrow - Session 1"
[16] "From Zero to Hero: Best practices for setting up Rstudio Team in the Cloud - Session 1"
[17] "How reproducible am I? A retrospective on a year of commercial data science projects in R - Session 1"
[18] "Cognitive speed: How the Tidyverse helped the British Red Cross respond quickly to COVID-19 - Session 1"
[19] "Easy larger-than-RAM data manipulation with {disk.frame} - Session 1"
[20] "Lifelong Learning with R Weekly - Session 1"
[21] "xaringan Playground: Using xaringan to learn web development - Session 1"
[22] "Custom theming in Shiny & R Markdown with bslib & thematic - Session 1"
[23] "parsermd - parsing R Markdown for fun and profit - Session 1"
[24] "How Content Makes the Data Go 'Round - Session 1"
[[3]]
[1] "Discussion: Data for good 2 - Session 1"
[2] "Designing Randomized Studies using Shiny - Session 1"
[3] "Discussion: Organisational tooling 2 - Session 1"
[4] "Discussion: Programming 2 - Session 1"
[5] "Maintaining the house the tidyverse built - Session 2"
[6] "Q&A: Hadley Wickham - Session 2"
[7] "Accessible Data Science Beyond Visual Models: Non-Visual Interactions with R and RStudio Packages - Session 2"
[8] "Always look on the bright side of plots - Session 2"
[9] "Bringing the Tidyverse to Python with Siuba - Session 2"
[10] "A New Paradigm for Multifigure, Coordinate-Based Plotting in R - Session 2"
[11] "R & Python: Going Steady - Session 2"
[12] "Starting an R Book Club: Cooking Up Friendships in Isolation - Session 2"
[13] "Discussion: Learning 1 - Session 2"
[14] "Discussion: Visualisation 1 - Session 2"
[15] "Discussion: Language interop 1 - Session 2"
[16] "Art Lessons: One Year as RStudio’s Artist-in-Residence - Session 2"
[17] "Using pins with Python and JavaScript - Session 2"
[18] "Trial and Error in Data Viz at the ACLU - Session 2"
[19] "Racial Equity Dashboard: Unpacking Systemic Inequity - Session 2"
[20] "The dynamic duo: SQL & R - Session 2"
[21] "Aesthetically automated figure production - Session 2"
[22] "Your R is My R too: Reflections on creating the Mi-R community - Session 2"
[23] "An easy and friendly way to build your multilingual website - Session 2"
[24] "Making .pot-ery with R: Translations in R Packages - Session 2"
# titles --
titles_all <- map(1:7, get_titles) %>%
unlist()
# dates --
dates_all <- map(1:7, get_dates) %>%
unlist()
# creating tibble from scrapes --
schedule <-
tibble(event_name = titles_all,
date_time = dates_all)
schedule
# defining string patterns --
str_at <- "\\s+at\\s+"
str_to <- "\\s+to\\s+"
str_EST <- " EST"
str_day <- "\\w+\\,\\s+"
# wrangling the date-time strings --
schedule_new_times <-
schedule %>%
mutate(date = str_replace_all(date_time, pattern = str_to, "-"),
date = str_replace_all(date, str_EST, "")) %>%
tidyr::separate(date, sep = str_at, c("day_date", "time")) %>%
mutate(date = str_replace(day_date, pattern = str_day, ""),
date = lubridate::mdy(date, tz = "US/Eastern")) %>%
mutate(date_time_new = str_c(date, time, sep = " ")) %>%
tidyr::separate(time, sep = "-", c("start_time", "end_time"))
# wrangling the dates and times --
schedule_new <-
schedule_new_times %>%
mutate(start_datetime = str_c(date, start_time, sep = " "),
end_datetime = str_c(date, end_time, sep = " ")) %>%
mutate(across(c(start_datetime, end_datetime),
~lubridate::ymd_hm(.x, tz = "US/Eastern"))) %>%
select(-c(date_time, day_date, date_time_new))
# writing to CSV and RDS files --
write_csv(schedule_new,
here("schedule-files", "schedule_new_EST.csv"))
saveRDS(schedule_new,
here("schedule-files", "schedule_new_EST.Rds"))
timezone <- Sys.timezone()
schedule_new_timezone <- schedule_new %>%
mutate(start_datetime = lubridate::with_tz(start_datetime, tzone = timezone),
end_datetime = lubridate::with_tz(end_datetime, tzone = timezone))
# writing to CSV and RDS files --
write_csv(schedule_new_timezone,
here("schedule-files", "schedule_new_localtz.csv"))
saveRDS(schedule_new_timezone,
here("schedule-files", "schedule_new_localtz.Rds"))
# creating a function --
make_calendar <- function(event) {
event_subset <- schedule_new_timezone[event, ]
calendar_event <-
calendar::ic_event(start_time = event_subset$start_datetime,
end_time = event_subset$end_datetime,
summary = event_subset$event_name)
return(calendar_event)
}
number_events <- length(schedule_new_timezone$event_name)
# creating ics objects for all events --
events_all <- map(1:number_events, make_calendar) %>%
bind_rows()
# writing to .ics file --
calendar::ic_write(events_all,
here("calendar-files-ics", "all_events_localtime.ics"))
processing file: rstudio-global-2021-calendar.Rmd
|
| | 0%
|
|.. | 3%
|
|..... | 7%
|
|....... | 10%
|
|......... | 14%
|
|........... | 17%
|
|.............. | 21%
|
|................ | 24%
|
|.................. | 28%
|
|.................... | 31%
|
|....................... | 34%
|
|......................... | 38%
|
|........................... | 41%
|
|.............................. | 45%
|
|................................ | 48%
|
|.................................. | 52%
|
|.................................... | 55%
|
|....................................... | 59%
|
|......................................... | 62%
|
|........................................... | 66%
|
|.............................................. | 69%
|
|................................................ | 72%
|
|.................................................. | 76%
|
|.................................................... | 79%
|
|....................................................... | 83%
|
|......................................................... | 86%
|
|........................................................... | 90%
|
|............................................................. | 93%
|
|................................................................ | 97%
|
|..................................................................| 100%
output file: rstudio-global-2021-calendar.R
[1] "rstudio-global-2021-calendar.R"