# # # # # # # # # # # # # # # # # # # # #

install.packages("tidyverse", repos = "http://cran.us.r-project.org")

## 
## The downloaded binary packages are in
##  /var/folders/3j/tlpxgbnn03s1zb0rcz4xx9j80000gn/T//RtmpvwAzt4/downloaded_packages

install.packages("lubridate", repos = "http://cran.us.r-project.org")

## 
## The downloaded binary packages are in
##  /var/folders/3j/tlpxgbnn03s1zb0rcz4xx9j80000gn/T//RtmpvwAzt4/downloaded_packages

install.packages("ggplot2", repos = "http://cran.us.r-project.org")

## 
## The downloaded binary packages are in
##  /var/folders/3j/tlpxgbnn03s1zb0rcz4xx9j80000gn/T//RtmpvwAzt4/downloaded_packages

install.packages("dplyr", repos = "http://cran.us.r-project.org")

## 
## The downloaded binary packages are in
##  /var/folders/3j/tlpxgbnn03s1zb0rcz4xx9j80000gn/T//RtmpvwAzt4/downloaded_packages

library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0      ✔ purrr   0.3.5 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

library(lubridate)

## Loading required package: timechange
## 
## Attaching package: 'lubridate'
## 
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

library(ggplot2)
library(dplyr)
getwd()

## [1] "/Users/rising.volkan007/Desktop/q1_2020"

setwd("/Users/rising.volkan007/Desktop/q1_2020")

STEP 1: COLLECTED DATA

Uploaded Divvy datasets for the year of 2022 (csv files) here

q1_2022 <- read_csv("202210-divvy-tripdata.csv", show_col_types = FALSE)
q2_2022 <- read_csv("202209-divvy-tripdata.csv", show_col_types = FALSE)
q3_2022 <- read_csv("202208-divvy-tripdata.csv", show_col_types = FALSE)
q4_2022 <- read_csv("202207-divvy-tripdata.csv", show_col_types = FALSE)
q5_2022 <- read_csv("202206-divvy-tripdata.csv", show_col_types = FALSE)
q6_2022 <- read_csv("202205-divvy-tripdata.csv", show_col_types = FALSE)
q7_2022 <- read_csv("202204-divvy-tripdata.csv", show_col_types = FALSE)
q8_2022 <- read_csv("202203-divvy-tripdata.csv", show_col_types = FALSE)
q9_2022 <- read_csv("202201-divvy-tripdata.csv", show_col_types = FALSE)

STEP 2: WRANGLE DATA AND COMBINE INTO A SINGLE FILE

Compared column names each of the files

While the names don’t have to be in the same order, they DO need to match perfectly before we can use a command to join them into one file

colnames(q1_2022)

##  [1] "ride_id"            "rideable_type"      "started_at"        
##  [4] "ended_at"           "start_station_name" "start_station_id"  
##  [7] "end_station_name"   "end_station_id"     "start_lat"         
## [10] "start_lng"          "end_lat"            "end_lng"           
## [13] "member_casual"

colnames(q2_2022)

##  [1] "ride_id"            "rideable_type"      "started_at"        
##  [4] "ended_at"           "start_station_name" "start_station_id"  
##  [7] "end_station_name"   "end_station_id"     "start_lat"         
## [10] "start_lng"          "end_lat"            "end_lng"           
## [13] "member_casual"

colnames(q3_2022)

##  [1] "ride_id"            "rideable_type"      "started_at"        
##  [4] "ended_at"           "start_station_name" "start_station_id"  
##  [7] "end_station_name"   "end_station_id"     "start_lat"         
## [10] "start_lng"          "end_lat"            "end_lng"           
## [13] "member_casual"

colnames(q4_2022)

##  [1] "ride_id"            "rideable_type"      "started_at"        
##  [4] "ended_at"           "start_station_name" "start_station_id"  
##  [7] "end_station_name"   "end_station_id"     "start_lat"         
## [10] "start_lng"          "end_lat"            "end_lng"           
## [13] "member_casual"

colnames(q5_2022)

##  [1] "ride_id"            "rideable_type"      "started_at"        
##  [4] "ended_at"           "start_station_name" "start_station_id"  
##  [7] "end_station_name"   "end_station_id"     "start_lat"         
## [10] "start_lng"          "end_lat"            "end_lng"           
## [13] "member_casual"

colnames(q6_2022)

##  [1] "ride_id"            "rideable_type"      "started_at"        
##  [4] "ended_at"           "start_station_name" "start_station_id"  
##  [7] "end_station_name"   "end_station_id"     "start_lat"         
## [10] "start_lng"          "end_lat"            "end_lng"           
## [13] "member_casual"

Inspected the dataframes and look for incongruencies

Converted ride_id and rideable_type to character so that they can stack correctly

q1_2022 <- mutate(q1_2022, ride_id= as.character(ride_id), rideable_type=as.character(rideable_type))

q2_2022 <- mutate(q2_2022, ride_id= as.character(ride_id), rideable_type=as.character(rideable_type))

q3_2022 <- mutate(q3_2022, ride_id= as.character(ride_id), rideable_type=as.character(rideable_type))

q4_2022 <- mutate(q4_2022, ride_id= as.character(ride_id), rideable_type=as.character(rideable_type))

q5_2022 <- mutate(q5_2022, ride_id= as.character(ride_id), rideable_type=as.character(rideable_type))

q6_2022 <- mutate(q6_2022, ride_id= as.character(ride_id), rideable_type=as.character(rideable_type))

q7_2022 <- mutate(q7_2022, ride_id= as.character(ride_id), rideable_type=as.character(rideable_type))

q8_2022 <- mutate(q8_2022, ride_id= as.character(ride_id), rideable_type=as.character(rideable_type))

q9_2022 <- mutate(q9_2022, ride_id= as.character(ride_id), rideable_type=as.character(rideable_type))

Stack individual quarter’s data frames into one big data frame

all_trips <- bind_rows(q1_2022, q2_2022, q3_2022, q4_2022,q5_2022,q6_2022,q7_2022,q8_2022,q9_2022)

Removed lat, long data was dropped beginning in 2022

all_trips <- all_trips %>% 
  select(-c(start_lat,start_lng,end_lat,end_lng))

STEP 3: CLEAN UP AND ADD DATA TO PREPARE FOR ANALYSIS

Inspect the new table that has been created

colnames(all_trips)  #List of column names

## [1] "ride_id"            "rideable_type"      "started_at"        
## [4] "ended_at"           "start_station_name" "start_station_id"  
## [7] "end_station_name"   "end_station_id"     "member_casual"

nrow(all_trips)  #How many rows are in data frame?

## [1] 5032567

dim(all_trips)  #Dimensions of the data frame?

## [1] 5032567       9

head(all_trips)  #See the first 6 rows of data frame.  Also tail(all_trips)

## # A tibble: 6 × 9
##   ride_id        ridea…¹ started_at          ended_at            start…² start…³
##   <chr>          <chr>   <dttm>              <dttm>              <chr>   <chr>  
## 1 A50255C1E1794… classi… 2022-10-14 17:13:30 2022-10-14 17:19:39 Noble … 13290  
## 2 DB692A70BD2DD… electr… 2022-10-01 16:29:26 2022-10-01 16:49:06 Damen … 13288  
## 3 3C02727AAF60F… electr… 2022-10-19 18:55:40 2022-10-19 19:03:30 Hoyne … 655    
## 4 47E653FDC2D99… electr… 2022-10-31 07:52:36 2022-10-31 07:58:49 Rush S… KA1504…
## 5 8B5407BE53515… classi… 2022-10-13 18:41:03 2022-10-13 19:26:18 900 W … 13028  
## 6 A177C92E9F021… electr… 2022-10-13 15:53:27 2022-10-13 15:59:17 900 W … 13028  
## # … with 3 more variables: end_station_name <chr>, end_station_id <chr>,
## #   member_casual <chr>, and abbreviated variable names ¹rideable_type,
## #   ²start_station_name, ³start_station_id

tail(all_trips)

## # A tibble: 6 × 9
##   ride_id        ridea…¹ started_at          ended_at            start…² start…³
##   <chr>          <chr>   <dttm>              <dttm>              <chr>   <chr>  
## 1 9C80CD03B685B… electr… 2022-01-09 18:56:50 2022-01-09 19:02:50 Broadw… 13325  
## 2 8788DA3EDE8FD… electr… 2022-01-18 12:36:48 2022-01-18 12:46:19 Clinto… WL-012 
## 3 C6C3B64FDC827… electr… 2022-01-27 11:00:06 2022-01-27 11:02:40 Racine… 13155  
## 4 CA281AE7D8B06… electr… 2022-01-10 16:14:51 2022-01-10 16:20:58 Broadw… 13325  
## 5 44E3489918623… electr… 2022-01-19 13:22:11 2022-01-19 13:24:27 Racine… 13155  
## 6 E477C594A182A… electr… 2022-01-13 17:24:43 2022-01-13 17:28:14 Clinto… WL-012 
## # … with 3 more variables: end_station_name <chr>, end_station_id <chr>,
## #   member_casual <chr>, and abbreviated variable names ¹rideable_type,
## #   ²start_station_name, ³start_station_id

str(all_trips)  #See list of columns and data types (numeric, character, etc)

## tibble [5,032,567 × 9] (S3: tbl_df/tbl/data.frame)
##  $ ride_id           : chr [1:5032567] "A50255C1E17942AB" "DB692A70BD2DD4E3" "3C02727AAF60F873" "47E653FDC2D99236" ...
##  $ rideable_type     : chr [1:5032567] "classic_bike" "electric_bike" "electric_bike" "electric_bike" ...
##  $ started_at        : POSIXct[1:5032567], format: "2022-10-14 17:13:30" "2022-10-01 16:29:26" ...
##  $ ended_at          : POSIXct[1:5032567], format: "2022-10-14 17:19:39" "2022-10-01 16:49:06" ...
##  $ start_station_name: chr [1:5032567] "Noble St & Milwaukee Ave" "Damen Ave & Charleston St" "Hoyne Ave & Balmoral Ave" "Rush St & Cedar St" ...
##  $ start_station_id  : chr [1:5032567] "13290" "13288" "655" "KA1504000133" ...
##  $ end_station_name  : chr [1:5032567] "Larrabee St & Division St" "Damen Ave & Cullerton St" "Western Ave & Leland Ave" "Orleans St & Chestnut St (NEXT Apts)" ...
##  $ end_station_id    : chr [1:5032567] "KA1504000079" "13089" "TA1307000140" "620" ...
##  $ member_casual     : chr [1:5032567] "member" "casual" "member" "member" ...

summary(all_trips)  #Statistical summary of data. Mainly for numerics

##    ride_id          rideable_type        started_at                   
##  Length:5032567     Length:5032567     Min.   :2022-01-01 00:00:05.0  
##  Class :character   Class :character   1st Qu.:2022-05-26 22:46:23.0  
##  Mode  :character   Mode  :character   Median :2022-07-14 15:02:45.0  
##                                        Mean   :2022-07-10 18:41:37.5  
##                                        3rd Qu.:2022-09-01 07:25:58.5  
##                                        Max.   :2022-10-31 23:59:33.0  
##     ended_at                      start_station_name start_station_id  
##  Min.   :2022-01-01 00:01:48.00   Length:5032567     Length:5032567    
##  1st Qu.:2022-05-26 23:01:21.00   Class :character   Class :character  
##  Median :2022-07-14 15:23:31.00   Mode  :character   Mode  :character  
##  Mean   :2022-07-10 19:01:45.52                                        
##  3rd Qu.:2022-09-01 07:39:32.00                                        
##  Max.   :2022-11-07 04:53:58.00                                        
##  end_station_name   end_station_id     member_casual     
##  Length:5032567     Length:5032567     Length:5032567    
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
##

Checked to make sure the proper number of observations were reassigned

table(all_trips$member_casual)

## 
##  casual  member 
## 2154950 2877617

Added columns that list the date, month, day, and year of each ride

This will allow us to aggregate ride data for each month, day, or year … before completing these operations we could only aggregate at the ride level

https://www.statmethods.net/input/dates.html more on date formats in R found at that link

all_trips$date <- as.Date(all_trips$started_at) #The default format is yyyy-mm-dd
all_trips$month <- format(as.Date(all_trips$date), "%m")
all_trips$day <- format(as.Date(all_trips$date), "%d")
all_trips$year <- format(as.Date(all_trips$date), "%Y")
all_trips$day_of_week <- format(as.Date(all_trips$date), "%A")

Added a “ride_length” calculation to all_trips (in seconds)

https://stat.ethz.ch/R-manual/R-devel/library/base/html/difftime.html

all_trips$ride_length <- difftime(all_trips$ended_at,all_trips$started_at)

Inspect the structure of the columns

str(all_trips)

## tibble [5,032,567 × 15] (S3: tbl_df/tbl/data.frame)
##  $ ride_id           : chr [1:5032567] "A50255C1E17942AB" "DB692A70BD2DD4E3" "3C02727AAF60F873" "47E653FDC2D99236" ...
##  $ rideable_type     : chr [1:5032567] "classic_bike" "electric_bike" "electric_bike" "electric_bike" ...
##  $ started_at        : POSIXct[1:5032567], format: "2022-10-14 17:13:30" "2022-10-01 16:29:26" ...
##  $ ended_at          : POSIXct[1:5032567], format: "2022-10-14 17:19:39" "2022-10-01 16:49:06" ...
##  $ start_station_name: chr [1:5032567] "Noble St & Milwaukee Ave" "Damen Ave & Charleston St" "Hoyne Ave & Balmoral Ave" "Rush St & Cedar St" ...
##  $ start_station_id  : chr [1:5032567] "13290" "13288" "655" "KA1504000133" ...
##  $ end_station_name  : chr [1:5032567] "Larrabee St & Division St" "Damen Ave & Cullerton St" "Western Ave & Leland Ave" "Orleans St & Chestnut St (NEXT Apts)" ...
##  $ end_station_id    : chr [1:5032567] "KA1504000079" "13089" "TA1307000140" "620" ...
##  $ member_casual     : chr [1:5032567] "member" "casual" "member" "member" ...
##  $ date              : Date[1:5032567], format: "2022-10-14" "2022-10-01" ...
##  $ month             : chr [1:5032567] "10" "10" "10" "10" ...
##  $ day               : chr [1:5032567] "14" "01" "19" "31" ...
##  $ year              : chr [1:5032567] "2022" "2022" "2022" "2022" ...
##  $ day_of_week       : chr [1:5032567] "Friday" "Saturday" "Wednesday" "Monday" ...
##  $ ride_length       : 'difftime' num [1:5032567] 369 1180 470 373 ...
##   ..- attr(*, "units")= chr "secs"

Convert “ride_length” from Factor to numeric so we can run calculations on the data is.factor(all_trips$ride_length)

all_trips$ride_length <- as.numeric(as.character(all_trips$ride_length))
is.numeric(all_trips$ride_length)

## [1] TRUE

Removed “bad” data

The dataframe includes a few hundred entries when bikes were taken out of docks and checked for quality by Divvy or ride_length was negative

We will create a new version of the dataframe (v2) since data is being removed

https://www.datasciencemadesimple.com/delete-or-drop-rows-in-r-with-conditions-2/

all_trips_v2 <- all_trips[!(all_trips$start_station_name == "HQ QR" | all_trips$ride_length<0),]

STEP 4: CONDUCT DESCRIPTIVE ANALYSIS

Descriptive analysis on ride_length (all figures in seconds)

mean(all_trips_v2$ride_length) #straight average (total ride length / rides)

## [1] NA

median(all_trips_v2$ride_length) #midpoint number in the ascending array of ride lengths

## [1] NA

max(all_trips_v2$ride_length) #longest ride

## [1] NA

min(all_trips_v2$ride_length) #shortest ride

## [1] NA

I had condensed the four lines above to one line using summary() on the specific attribute

summary(all_trips_v2$ride_length)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##       0     371     653    1277    1176 2483235  733225

Compared members and casual users

aggregate(all_trips_v2$ride_length ~ all_trips_v2$member_casual, FUN = mean)

##   all_trips_v2$member_casual all_trips_v2$ride_length
## 1                     casual                 1932.152
## 2                     member                  787.723

aggregate(all_trips_v2$ride_length ~ all_trips_v2$member_casual, FUN = median)

##   all_trips_v2$member_casual all_trips_v2$ride_length
## 1                     casual                      837
## 2                     member                      551

aggregate(all_trips_v2$ride_length ~ all_trips_v2$member_casual, FUN = max)

##   all_trips_v2$member_casual all_trips_v2$ride_length
## 1                     casual                  2483235
## 2                     member                    93594

aggregate(all_trips_v2$ride_length ~ all_trips_v2$member_casual, FUN = min)

##   all_trips_v2$member_casual all_trips_v2$ride_length
## 1                     casual                        0
## 2                     member                        0

See the average ride time by each day for members vs casual users

aggregate(all_trips_v2$ride_length ~ all_trips_v2$member_casual + all_trips_v2$day_of_week, FUN = mean)

##    all_trips_v2$member_casual all_trips_v2$day_of_week all_trips_v2$ride_length
## 1                      casual                   Friday                1863.6220
## 2                      member                   Friday                 770.7869
## 3                      casual                   Monday                1948.2110
## 4                      member                   Monday                 759.6822
## 5                      casual                 Saturday                2142.9944
## 6                      member                 Saturday                 883.3955
## 7                      casual                   Sunday                2219.5627
## 8                      member                   Sunday                 874.1955
## 9                      casual                 Thursday                1678.0602
## 10                     member                 Thursday                 756.6890
## 11                     casual                  Tuesday                1737.7837
## 12                     member                  Tuesday                 750.2949
## 13                     casual                Wednesday                1652.1514
## 14                     member                Wednesday                 747.5031

Notice that the days of the week are out of order. Let’s fix that

all_trips_v2$day_of_week <- ordered(all_trips_v2$day_of_week, levels=c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"))

Now, let’s run the average ride time by each day for members vs casual users

aggregate(all_trips_v2$ride_length ~ all_trips_v2$member_casual + all_trips_v2$day_of_week, FUN = mean)

##    all_trips_v2$member_casual all_trips_v2$day_of_week all_trips_v2$ride_length
## 1                      casual                   Sunday                2219.5627
## 2                      member                   Sunday                 874.1955
## 3                      casual                   Monday                1948.2110
## 4                      member                   Monday                 759.6822
## 5                      casual                  Tuesday                1737.7837
## 6                      member                  Tuesday                 750.2949
## 7                      casual                Wednesday                1652.1514
## 8                      member                Wednesday                 747.5031
## 9                      casual                 Thursday                1678.0602
## 10                     member                 Thursday                 756.6890
## 11                     casual                   Friday                1863.6220
## 12                     member                   Friday                 770.7869
## 13                     casual                 Saturday                2142.9944
## 14                     member                 Saturday                 883.3955

analyze ridership data by type and weekday

all_trips_v2 %>% 
  mutate(weekday = wday(started_at, label = TRUE)) %>%  #creates weekday field using wday()
  group_by(member_casual, weekday) %>%  #groups by usertype and weekday
  summarise(number_of_rides = n()                           #calculates the number of rides and average duration 
  ,average_duration = mean(ride_length)) %>%        # calculates the average duration
  arrange(member_casual, weekday)

## `summarise()` has grouped output by 'member_casual'. You can override using the
## `.groups` argument.

## # A tibble: 15 × 4
## # Groups:   member_casual [3]
##    member_casual weekday number_of_rides average_duration
##    <chr>         <ord>             <int>            <dbl>
##  1 casual        Sun              316093            2220.
##  2 casual        Mon              220388            1948.
##  3 casual        Tue              202664            1738.
##  4 casual        Wed              209858            1652.
##  5 casual        Thu              237255            1678.
##  6 casual        Fri              261776            1864.
##  7 casual        Sat              388860            2143.
##  8 member        Sun              289515             874.
##  9 member        Mon              348050             760.
## 10 member        Tue              375587             750.
## 11 member        Wed              381003             748.
## 12 member        Thu              389414             757.
## 13 member        Fri              344143             771.
## 14 member        Sat              334677             883.
## 15 <NA>          <NA>             733225              NA

Let’s visualize the number of rides by rider type

all_trips_v2 %>% 
  mutate(weekday = wday(started_at, label = TRUE)) %>% 
  group_by(member_casual, weekday) %>% 
  summarise(number_of_rides = n()
            ,average_duration = mean(ride_length)) %>% 
  arrange(member_casual, weekday)  %>% 
  ggplot(aes(x = weekday, y = number_of_rides, fill = member_casual)) +
  geom_col(position = "dodge")

## `summarise()` has grouped output by 'member_casual'. You can override using the
## `.groups` argument.

Let’s create a visualization for average duration

all_trips_v2 %>% 
  mutate(weekday = wday(started_at, label = TRUE)) %>% 
  group_by(member_casual, weekday) %>% 
  summarise(number_of_rides = n()
            ,average_duration = mean(ride_length)) %>% 
  arrange(member_casual, weekday)  %>% 
  ggplot(aes(x = weekday, y = average_duration, fill = member_casual)) +
  geom_col(position = "dodge")

## `summarise()` has grouped output by 'member_casual'. You can override using the
## `.groups` argument.

## Warning: Removed 1 rows containing missing values (`geom_col()`).

STEP 5: EXPORT SUMMARY FILE FOR FURTHER ANALYSIS

Created a csv file that we will visualize in Excel, Tableau, or my presentation software

N.B.: This file location is for a Mac. If you are working on a PC, change the file location accordingly (most likely “C:_USERNAME...”) to export the data. You can read more here: https://datatofish.com/export-dataframe-to-csv-in-r/

counts <- aggregate(all_trips_v2$ride_length ~ all_trips_v2$member_casual + all_trips_v2$day_of_week, FUN = mean)
write.csv(counts, file = '~/Desktop/q1_2020/avg_ride_length.csv')

Case Study 1: How does a bike-share navigate speedy success?

Khalid MD Ashik

2022-11-30

# # # # # # # # # # # # # # # # # # # # #

Installed required packages

tidyverse for data import and wrangling

lubridate for date functions

ggplot for visualization

# # # # # # # # # # # # # # # # # # # # #

STEP 1: COLLECTED DATA

Uploaded Divvy datasets for the year of 2022 (csv files) here

STEP 2: WRANGLE DATA AND COMBINE INTO A SINGLE FILE

Compared column names each of the files

While the names don’t have to be in the same order, they DO need to match perfectly before we can use a command to join them into one file

Inspected the dataframes and look for incongruencies

Converted ride_id and rideable_type to character so that they can stack correctly

Stack individual quarter’s data frames into one big data frame

Removed lat, long data was dropped beginning in 2022

STEP 3: CLEAN UP AND ADD DATA TO PREPARE FOR ANALYSIS

Inspect the new table that has been created

Checked to make sure the proper number of observations were reassigned

Added columns that list the date, month, day, and year of each ride

This will allow us to aggregate ride data for each month, day, or year … before completing these operations we could only aggregate at the ride level

https://www.statmethods.net/input/dates.html more on date formats in R found at that link

Added a “ride_length” calculation to all_trips (in seconds)

https://stat.ethz.ch/R-manual/R-devel/library/base/html/difftime.html

Inspect the structure of the columns

Convert “ride_length” from Factor to numeric so we can run calculations on the data is.factor(all_trips$ride_length)

Removed “bad” data

The dataframe includes a few hundred entries when bikes were taken out of docks and checked for quality by Divvy or ride_length was negative

We will create a new version of the dataframe (v2) since data is being removed

https://www.datasciencemadesimple.com/delete-or-drop-rows-in-r-with-conditions-2/

STEP 4: CONDUCT DESCRIPTIVE ANALYSIS

Descriptive analysis on ride_length (all figures in seconds)

I had condensed the four lines above to one line using summary() on the specific attribute

Compared members and casual users

See the average ride time by each day for members vs casual users

Notice that the days of the week are out of order. Let’s fix that

Now, let’s run the average ride time by each day for members vs casual users

analyze ridership data by type and weekday

Let’s visualize the number of rides by rider type

Let’s create a visualization for average duration

STEP 5: EXPORT SUMMARY FILE FOR FURTHER ANALYSIS

Created a csv file that we will visualize in Excel, Tableau, or my presentation software

N.B.: This file location is for a Mac. If you are working on a PC, change the file location accordingly (most likely “C:_USERNAME...”) to export the data. You can read more here: https://datatofish.com/export-dataframe-to-csv-in-r/