%%HTML
<h1>Speed Dating Problem</h1>
import os
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from __future__ import division
df = pd.read_csv("data/Speed Dating Data.csv")
#Creating dummy columns -> "Shared Interests", question type 2 & 5, times T1, T2, T3
cols = ['shar3_1', 'shar3_2', 'shar3_3', 'shar5_1', 'shar5_2', 'shar5_3']
df = pd.concat([df, pd.DataFrame(columns=cols)])
#Filling NA values in the 'id' column
#Every subject (denoted by their 'iid') has a unique 'id' value, which denotes their number within their wave.
df['id'] = df[['iid', 'id']].groupby(['iid'])['id'].transform(lambda grp: grp.fillna(method='ffill'))
columns = ['iid', 'age', 'gender', 'field', 'field_cd',
'undergra', 'mn_sat', 'tuition', 'race', 'from',
'zipcode', 'income', 'career', 'career_c']
df_bio1 = df[columns].drop_duplicates()
columns = ['iid', 'imprace', 'imprelig', 'goal', 'date',
'go_out', 'sports', 'tvsports', 'exercise', 'dining',
'museums', 'art', 'hiking', 'gaming', 'clubbing',
'reading', 'tv', 'theater', 'movies', 'concerts',
'music', 'shopping', 'yoga', 'exphappy', 'expnum',
'you_call', 'them_cal', 'date_3', 'numdat_3', 'num_in_3',
'satis_2', 'length', 'numdat_2']
df_bio2 = df[columns].drop_duplicates()
# 1 - What do you look for in the opposite sex?
# 2 - What do you think the opposite sex looks for in a date?
# 3 - How do you think you measure up?
# 4 - What do you think MOST of your fellow men/women look for in the opposite sex?
# 5 - How do you think others perceive you?
columns = ['iid',
'attr1_1', 'sinc1_1', 'intel1_1', 'fun1_1', 'amb1_1', 'shar1_1',
'attr2_1', 'sinc2_1', 'intel2_1', 'fun2_1', 'amb2_1', 'shar2_1',
'attr3_1', 'sinc3_1', 'intel3_1', 'fun3_1', 'amb3_1',
'attr4_1', 'sinc4_1', 'intel4_1', 'fun4_1', 'amb4_1', 'shar4_1',
'attr5_1', 'sinc5_1', 'intel5_1', 'fun5_1', 'amb5_1']
df_exp1 = df[columns].drop_duplicates()
columns = ['iid', 'order', 'pid', 'partner', 'match', 'dec_o', 'dec', 'int_corr',
'samerace', 'age_o', 'race_o',
'attr_o', 'sinc_o', 'intel_o', 'fun_o',
'amb_o', 'shar_o', 'like_o', 'prob_o', 'met_o',
'like', 'prob', 'attr', 'sinc',
'intel', 'fun', 'amb', 'shar']
df_dates = df[columns].drop_duplicates().sort_values(by=['iid', 'order', 'pid', 'partner'])
cols = ['amb', 'attr', 'fun', 'intel', 'shar', 'sinc']
df2.loc[(df2.iid==1) & (df2.type==1)][cols].mean()
df_dates.loc[df_dates.iid==1][['iid', 'pid', 'dec', 'dec_o', 'match', 'like']]
df_dates.loc[(df_dates.iid==1)&(df_dates.dec==1)][['pid','like','attr','sinc','intel','fun','amb','shar']]
times = range(1, 4)
types = range(1, 6)
rows = list()
for iid in df['iid'].drop_duplicates():
record = df.loc[df.iid==iid].head(1)
for _time in times:
t1 = str(_time)
for _type in types:
t2 = str(_type)
rows.append({
'iid':iid,
'gender':record['gender'].values[0],
'race': record['race'].values[0],
'wave': record['wave'].values[0],
'time':_time,
'type':_type,
'attr':record['attr'+t2+"_"+t1].values[0],
'sinc':record['sinc'+t2+"_"+t1].values[0],
'intel':record['intel'+t2+"_"+t1].values[0],
'fun':record['fun'+t2+"_"+t1].values[0],
'amb':record['amb'+t2+"_"+t1].values[0],
'shar':record['shar'+t2+"_"+t1].values[0]})
df2 = pd.DataFrame(rows)
# 1 - What do you look for in the opposite sex?
# 2 - What do you think the opposite sex looks for in a date?
# 3 - How do you think you measure up?
# 4 - What do you think MOST of your fellow men/women look for in the opposite sex?
# 5 - How do you think others perceive you?
cols = ['amb', 'attr', 'fun', 'intel', 'shar', 'sinc']
df2.groupby(['type','gender'])[cols].median()