#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Sun Mar 25 23:31:08 2018 @author: megan """ import pandas as pd import scipy.stats as st # statistical library # Read in one day of the 2016 Green Taxi Trip Dataset taxi = pd.read_csv("2016_Green_Taxi_Trip_Data.csv") # Compute the mean number of passengers per trip. We are assuming the dataset is the sample. sample_mean = taxi['Passenger_count'].mean() # Compute standard deviation of the number of passengers per trip sample_std_dev = taxi['Passenger_count'].std() # Count the number of rows in the dataframe. num_data = len(taxi['Passenger_count']) # Compute the confidence interval for the mean number of passengers per trip and store it in the variable ci: # The first option/parameter (0.95) is the confidence level. So in this example, it is 95%. # The second option/parameter is the number of degrees of freedom, which is the # of data points - 1 # The third option/parameter is the sample mean. # The fourth option/parameter is the sample standard deviation. ci = st.t.interval(0.95,num_data - 1,loc = sample_mean, scale = sample_std_dev) # Print out the sample mean and its 95% confidence interval. print("Mean passengers:",sample_mean) print("Confidence interval:", ci)