taxi <-read.csv("Green_Taxi_Trip_Data_Feb2_2016.csv")
# make a new data frame containing only rows where
# Passenger_count is 1
solo_passengers_df <- subset(taxi, Passenger_count == 1)
# check above command worked
str(solo_passengers_df)
head(solo_passengers_df)
# make a new data frame containing only rows where
# Passenger_count is >= 2
multi_passenger_df <- subset(taxi,Passenger_count >= 2)
# check code worked
head(multi_passenger_df)
# hypothesis test comparing means of two groups
t.test(solo_passengers_df$Trip_distance,multi_passenger_df$Trip_distance)
# make a new column with mean cost per passenger (divide Fare_amount by
# Passenger_count in each row)
taxi$Cost_per_passenger <- taxi$Fare_amount/taxi$Passenger_count
# create a new column (Total_fare) that is the sum of the columns
# Fare_amount, Extra, MTA_tax, Tip_amount, and Tolls_amount  
# ie. the values in each row in those columns are added up
taxi$Total_fare <- taxi$Fare_amount + taxi$Extra + taxi$MTA_tax + taxi$Tip_amount + taxi$Tolls_amount