Calculate odds coefficients¶

This file is also a work in progress--a partial migration of other code¶

import pandas as pd
import numpy as np
import os
from sklearn import linear_model
from sklearn.metrics import log_loss

results_file='../raw_inputs/anon_application_results.csv'
barrons_cases='../raw_inputs/odds_cases-barrons.csv'
barrons_output_file='../inputs/anon_barrons_coefs.csv'
schools_output_file='../inputs/anon_schools_coefs.csv'

os.chdir('../inputs')

# First we're going to read the admissions results
df = pd.read_csv(results_file,encoding='cp1252')
df.head()

print(len(df))
df.hs_class.value_counts()

68757

2018    19446
2017    16947
2016    16566
2015    15798
Name: hs_class, dtype: int64

#First, we're going to analyze this data based on Barrons classes
b_case_df = pd.read_csv(barrons_cases,encoding='cp1252')
b_case_df.head()

print(len(b_case_df))
b_case_df.RACE.value_counts()

30

AA     10
W/A    10
H      10
Name: RACE, dtype: int64

b_case_df.hs_class.describe()

count       30
unique       2
top       2018
freq        20
Name: hs_class, dtype: object

# We'll iterate over different cases, save them to a LoL for saving out
output_table=[['Case','N','N1','GPAcoef','ACTcoef','Int','Score','Loss',
               '50gpa','50act', '50pred','Plus.05/.5red','Plus.1/1pred']]

# Here is a function used to do the regression analyses
def run_lregression(data):
    '''Returns the logistic regression results for the passed numpy array
    where the first columns are the independent variables and the final
    column is the outcome (Y)'''
    lr = linear_model.LogisticRegression(C=10000000000, solver='newton-cg')
    X = data[:,:-1]
    Y = data[:,-1]
    lr.fit(X, Y)
    GPAcoef = lr.coef_[0][0]
    ACTcoef = lr.coef_[0][1]
    intercept = lr.intercept_[0]
    score = lr.score(X,Y)
    loss = log_loss(Y, lr.predict_proba(X))
    # now create some sensitivity stats
    # first find the average gpa of points near 50/50
    preds = lr.predict_proba(X)
    gpa_yes = []
    for i in range(len(preds)):
        if (preds[i][0] > 0.35) and (preds[i][0] < 0.65):
            gpa_yes.append(X[i,0])

    # then calculate the ACT that corresponds to this average
    avg_yes_gpa = np.mean(gpa_yes)
    avg_act_yes = (-intercept - avg_yes_gpa*GPAcoef)/ACTcoef

    # next, build a sensitivity matrix and check the predictions
    X_check = np.array([[avg_yes_gpa, avg_act_yes],
                           [avg_yes_gpa+0.05, avg_act_yes+.5],
                           [avg_yes_gpa+0.1, avg_act_yes+1]])
    pred_check = lr.predict_proba(X_check)


    return [Y.sum(), GPAcoef, ACTcoef, intercept, score, loss,
            avg_yes_gpa, avg_act_yes, pred_check[0][1],pred_check[1][1],
            pred_check[2][1]]

# We'll now iterate over each of the barrons cases for analysis.
# We skip the ones where the title begins with a #
for i, case in b_case_df.iterrows():
    if case.Title[0] != '#':
        if case.hs_class == 'ALL':
            this_df = df[(df.RACE == case.RACE)&(df.Barrons == case.Barrons)]
        else:
            this_df = df[(df.RACE == case.RACE)&(df.Barrons == case.Barrons)&(df.hs_class == 2018)]
    
        # Now get the right array to send to the analysis function:
        act_var = 'ACT50' if case.RACE == 'W/A' else 'ACT25'
        short_df = this_df[['GPA','ACT','Y',act_var]]
        short_df = short_df[pd.notnull(short_df[act_var])]
        short_df['ACT'] = short_df['ACT']-short_df[act_var]
        trial_data = short_df[['GPA','ACT','Y']].values
        
        # Now complete the regression and append the result to the output table
        if len(trial_data) > 1:
            print('%s: %d' % (case.Title, len(this_df)),end='')
            try:
                reg_response = run_lregression(trial_data)
                new_row = [case.Title, len(this_df)]
                new_row.extend(reg_response)
                output_table.append(new_row)
                print('...Works!')
            except Exception as e:
                print('...No dice!')
                #raise e

H:Most Competitive+: 490...Works!
H:Most Competitive: 810...Works!
H:Highly Competitive: 1397...Works!
H:Very Competitive: 3218...Works!
H:Competitive: 4013...Works!
H:Less Competitive: 467...Works!
H:Noncompetitive: 276...Works!
B:Most Competitive+: 176...Works!
B:Most Competitive: 389...Works!
B:Highly Competitive: 684...Works!
B:Very Competitive: 1378...Works!
B:Competitive: 3628...Works!
B:Less Competitive: 712...Works!
B:Noncompetitive: 299...Works!
W:Most Competitive+: 241...Works!
W:Most Competitive: 330...Works!
W:Highly Competitive: 325...Works!
W:Very Competitive: 645...Works!
W:Competitive: 422...Works!
W:Less Competitive: 24...No dice!
W:Noncompetitive: 31...No dice!

c:\users\mniksch\dropbox (nnocs)\documents\noblegit\venvs\legacy\lib\site-packages\numpy\core\fromnumeric.py:2920: RuntimeWarning: Mean of empty slice.
  out=out, **kwargs)
c:\users\mniksch\dropbox (nnocs)\documents\noblegit\venvs\legacy\lib\site-packages\numpy\core\_methods.py:85: RuntimeWarning: invalid value encountered in double_scalars
  ret = ret.dtype.type(ret / rcount)
c:\users\mniksch\dropbox (nnocs)\documents\noblegit\venvs\legacy\lib\site-packages\numpy\core\fromnumeric.py:2920: RuntimeWarning: Mean of empty slice.
  out=out, **kwargs)
c:\users\mniksch\dropbox (nnocs)\documents\noblegit\venvs\legacy\lib\site-packages\numpy\core\_methods.py:85: RuntimeWarning: invalid value encountered in double_scalars
  ret = ret.dtype.type(ret / rcount)

barrons_output = pd.DataFrame(output_table[1:],columns=output_table[0])
barrons_output

barrons_output.to_csv(barrons_output_file,index=False)

That completes the main calculation that most odds results are based upon¶

Visually inspect the odds for the following things:¶

GPA and ACT coefficients are positive. If they're not, the odds are likely bad
The last two columns show reasonable increases in odds from a GPA increase of 0.05/0.10 and ACT increase of .5/1. These are a measure of the "spread" around the 50/50 odds line. If the numbers are two big, it means the model is too certain of the results on either side of the line. This can be corrected by doubling (or tripling) all 3 coefficients

We're now going to calculate odds for specific colleges if we have enough results¶

A few principles:¶

The general rule of thumb is we'll want to have at least 10 positive results and 10 negative results
We'll relax the above to 5 and 5 if there is no ACT25 for the school--that means they're likely test optional, or, at a minimum, that our ACT25 based calculation for the Barrons calcs won't work for that school
If we have the minimum number of results for a school for the specific race (AA, H, or W/A), we prefer to do the analysis only on the most recent year
If we don't have enough results, we can extend to the prior years

analysis_list=[['Label','NCES','RACE','hs_class']]
current_year = 2018
picker_df = df[['NCES','RACE','Y','hs_class','ACT25','type']]
for race in ['H', 'AA', 'W/A']:
    this_df = picker_df[picker_df.RACE==race]
    nces_vals = list(set(this_df.NCES))
    for nces in nces_vals:
        nces_df = this_df[this_df.NCES == nces]
        ty_yes = sum((nces_df.Y == 1) & (nces_df.hs_class == current_year))
        ty_no = sum((nces_df.Y == 0) & (nces_df.hs_class == current_year))
        ay_yes = sum(nces_df.Y == 1)
        ay_no = sum(nces_df.Y ==0)
        act25 = this_df.ACT25.iloc[0]
        if (ty_yes >= 10) & (ty_no >= 10):
            analysis_list.append([race+':'+str(nces), nces, race, current_year])
        elif ((ay_yes >= 10)&(ay_no >=10))|(np.isnan(act25)&(ay_yes >=5)&(ay_no >= 5)):
            analysis_list.append([race+':'+str(nces), nces, race, 'ALL'])

len(analysis_list)

291

anl_short=analysis_list[:5]
anl_short

[['Label', 'NCES', 'RACE', 'hs_class'],
 ['H:148584', '148584', 'H', 'ALL'],
 ['H:169248', '169248', 'H', 'ALL'],
 ['H:154527', '154527', 'H', 'ALL'],
 ['H:150136', '150136', 'H', 'ALL']]

analysis_list[-5:]

[['W/A:170976', '170976', 'W/A', 'ALL'],
 ['W/A:174844', '174844', 'W/A', 'ALL'],
 ['W/A:240444', '240444', 'W/A', 'ALL'],
 ['W/A:145637', '145637', 'W/A', 'ALL'],
 ['W/A:170532', '170532', 'W/A', 'ALL']]

# Now that we have a full list for analysis, we can run regressions for each
output_table=[['Case','N','N1','GPAcoef','ACTcoef','Int','Score','Loss',
               '50gpa','50act', '50pred','Plus.05/.5red','Plus.1/1pred']]
for case, nces, race, hs_class in analysis_list[1:]:
    if hs_class == 'ALL':
        this_df = df[(df.RACE == race)&(df.NCES == nces)]
    else:
        this_df = df[(df.RACE == race)&(df.NCES == nces)&(df.hs_class == hs_class)]
    
    # Now get the right array to send to the analysis function:
    trial_data = this_df[['GPA','ACT','Y']].values
        
    # Now complete the regression and append the result to the output table
    if len(trial_data) > 1:
        print('%s: %d' % (case, len(this_df)),end='')
        try:
            reg_response = run_lregression(trial_data)
            new_row = [case, len(this_df)]
            new_row.extend(reg_response)
            output_table.append(new_row)
            print('...Works!')
        except Exception as e:
            print('...No dice!')
            #raise e

H:148584: 63...Works!
H:169248: 318...Works!
H:154527: 29...Works!
H:150136: 36...Works!
H:151324: 35...No dice!
H:229267: 86...No dice!

c:\users\mniksch\dropbox (nnocs)\documents\noblegit\venvs\legacy\lib\site-packages\numpy\core\fromnumeric.py:2920: RuntimeWarning: Mean of empty slice.
  out=out, **kwargs)
c:\users\mniksch\dropbox (nnocs)\documents\noblegit\venvs\legacy\lib\site-packages\numpy\core\_methods.py:85: RuntimeWarning: invalid value encountered in double_scalars
  ret = ret.dtype.type(ret / rcount)
c:\users\mniksch\dropbox (nnocs)\documents\noblegit\venvs\legacy\lib\site-packages\numpy\core\fromnumeric.py:2920: RuntimeWarning: Mean of empty slice.
  out=out, **kwargs)
c:\users\mniksch\dropbox (nnocs)\documents\noblegit\venvs\legacy\lib\site-packages\numpy\core\_methods.py:85: RuntimeWarning: invalid value encountered in double_scalars
  ret = ret.dtype.type(ret / rcount)
c:\users\mniksch\dropbox (nnocs)\documents\noblegit\venvs\legacy\lib\site-packages\numpy\core\fromnumeric.py:2920: RuntimeWarning: Mean of empty slice.
  out=out, **kwargs)
c:\users\mniksch\dropbox (nnocs)\documents\noblegit\venvs\legacy\lib\site-packages\numpy\core\_methods.py:85: RuntimeWarning: invalid value encountered in double_scalars
  ret = ret.dtype.type(ret / rcount)

H:195526: 162...No dice!
H:197869: 31...Works!
H:178615: 129...Works!
H:153603: 58...Works!
H:204024: 32...Works!
H:152673: 98...Works!
H:147411: 36...No dice!
H:146719: 139...Works!

c:\users\mniksch\dropbox (nnocs)\documents\noblegit\venvs\legacy\lib\site-packages\numpy\core\fromnumeric.py:2920: RuntimeWarning: Mean of empty slice.
  out=out, **kwargs)
c:\users\mniksch\dropbox (nnocs)\documents\noblegit\venvs\legacy\lib\site-packages\numpy\core\_methods.py:85: RuntimeWarning: invalid value encountered in double_scalars
  ret = ret.dtype.type(ret / rcount)
c:\users\mniksch\dropbox (nnocs)\documents\noblegit\venvs\legacy\lib\site-packages\numpy\core\fromnumeric.py:2920: RuntimeWarning: Mean of empty slice.
  out=out, **kwargs)
c:\users\mniksch\dropbox (nnocs)\documents\noblegit\venvs\legacy\lib\site-packages\numpy\core\_methods.py:85: RuntimeWarning: invalid value encountered in double_scalars
  ret = ret.dtype.type(ret / rcount)

H:131496: 24...Works!
H:122612: 121...Works!
H:144281: 71...Works!
H:135726: 17...No dice!
H:144962: 119...Works!
H:117946: 35...Works!
H:213385: 199...Works!
H:153834: 150...Works!
H:153269: 38...Works!
H:143358: 118...Works!
H:149231: 257...Works!
H:144892: 291...Works!
H:152248: 14...No dice!
H:145646: 103...Works!
H:149505: 99

c:\users\mniksch\dropbox (nnocs)\documents\noblegit\venvs\legacy\lib\site-packages\numpy\core\fromnumeric.py:2920: RuntimeWarning: Mean of empty slice.
  out=out, **kwargs)
c:\users\mniksch\dropbox (nnocs)\documents\noblegit\venvs\legacy\lib\site-packages\numpy\core\_methods.py:85: RuntimeWarning: invalid value encountered in double_scalars
  ret = ret.dtype.type(ret / rcount)

...Works!
H:148487: 55...Works!
H:149772: 247...Works!
H:164988: 134...Works!
H:147776: 131...Works!
H:153384: 66...Works!
H:120254: 70...Works!
H:154350: 46...Works!
H:145725: 94...Works!
H:168218: 14...Works!
H:138600: 24...Works!
H:144740: 434...Works!
H:202480: 158...Works!
H:147703: 208...Works!
H:179159: 87...Works!
H:212674: 35...Works!
H:147536: 275...Works!
H:209056: 42...Works!
H:169798: 28...Works!
H:211440: 25...Works!
H:128902: 31...Works!
H:146427: 77...Works!
H:221351: 40...Works!
H:150163: 49...Works!
H:202523: 69...Works!
H:178396: 68...Works!
H:143084: 101...Works!
H:215062: 34...Works!
H:179867: 128...Works!
H:219709: 43...Works!
H:143118: 148...Works!
H:174844: 179...Works!
H:149222: 342...Works!
H:190415: 40...No dice!
H:239318: 34...Works!
H:147679: 177...Works!

c:\users\mniksch\dropbox (nnocs)\documents\noblegit\venvs\legacy\lib\site-packages\numpy\core\fromnumeric.py:2920: RuntimeWarning: Mean of empty slice.
  out=out, **kwargs)
c:\users\mniksch\dropbox (nnocs)\documents\noblegit\venvs\legacy\lib\site-packages\numpy\core\_methods.py:85: RuntimeWarning: invalid value encountered in double_scalars
  ret = ret.dtype.type(ret / rcount)
c:\users\mniksch\dropbox (nnocs)\documents\noblegit\venvs\legacy\lib\site-packages\numpy\core\fromnumeric.py:2920: RuntimeWarning: Mean of empty slice.
  out=out, **kwargs)
c:\users\mniksch\dropbox (nnocs)\documents\noblegit\venvs\legacy\lib\site-packages\numpy\core\_methods.py:85: RuntimeWarning: invalid value encountered in double_scalars
  ret = ret.dtype.type(ret / rcount)

H:211291: 81...Works!
H:154095: 67...Works!
H:153621: 59...No dice!
H:127060: 101...Works!
H:243780: 73...Works!
H:151111: 10...No dice!
H:212009: 83...Works!
H:148627: 127...Works!

c:\users\mniksch\dropbox (nnocs)\documents\noblegit\venvs\legacy\lib\site-packages\numpy\core\fromnumeric.py:2920: RuntimeWarning: Mean of empty slice.
  out=out, **kwargs)
c:\users\mniksch\dropbox (nnocs)\documents\noblegit\venvs\legacy\lib\site-packages\numpy\core\_methods.py:85: RuntimeWarning: invalid value encountered in double_scalars
  ret = ret.dtype.type(ret / rcount)

len(output_table)

277

schools_output = pd.DataFrame(output_table[1:],columns=output_table[0])
schools_output.head()

analysis_df = pd.DataFrame(analysis_list[1:],columns=analysis_list[0])

schools_output.to_csv(schools_output_file,index=False)
analysis_df.to_csv('anon_school_analysis_list.csv',index=False)

# Redo of the analysis (this was created by flagging cases
# in the file saved previously:
a_list= [
    ['H:144281','144281','H','ALL'],
    ['H:170532','170532','H','ALL'],
    ['H:Arrupe','Arrupe','H','ALL'],
    ['H:145691','145691','H','ALL'],
    ['AA:216597','216597','AA','ALL'],
    ['AA:178396','178396','AA','ALL'],
    ['AA:170532','170532','AA','ALL'],
    ['AA:Arrupe','Arrupe','AA','ALL'],
    ['AA:145691','145691','AA','ALL'],
    ['AA:170082','170082','AA','ALL'],
    ['AA:144740','144740','AA','ALL'],
]

output_table=[['Case','N','N1','GPAcoef','ACTcoef','Int','Score','Loss',
               '50gpa','50act', '50pred','Plus.05/.5red','Plus.1/1pred']]
for case, nces, race, hs_class in a_list[:]:
    if hs_class == 'ALL':
        this_df = df[(df.RACE == race)&(df.NCES == nces)]
    else:
        this_df = df[(df.RACE == race)&(df.NCES == nces)&(df.hs_class == hs_class)]
    
    # Now get the right array to send to the analysis function:
    trial_data = this_df[['GPA','ACT','Y']].values
        
    # Now complete the regression and append the result to the output table
    if len(trial_data) > 1:
        print('%s: %d' % (case, len(this_df)),end='')
        try:
            reg_response = run_lregression(trial_data)
            new_row = [case, len(this_df)]
            new_row.extend(reg_response)
            output_table.append(new_row)
            print('...Works!')
        except Exception as e:
            print('...No dice!')
            #raise e

H:144281: 228...Works!
H:170532: 256...Works!
H:Arrupe: 371...Works!
H:145691: 325...Works!
AA:216597: 155...Works!
AA:178396: 204...Works!
AA:170532: 161...Works!
AA:Arrupe: 172...Works!
AA:145691: 167...Works!
AA:170082: 547...Works!
AA:144740: 310...Works!

schools_output2 = pd.DataFrame(output_table[1:],columns=output_table[0])
schools_output2

schools_output2.to_csv('anon_schools_coefs_take_2.csv',index=False)

	NCES	hs_student_id	hs_class	type	result_code	Campus	RACE	GPA	ACT	Y	collegename	Barrons	ACT25	ACT50
0	148876	921691211	2018	Regular	Accepted	Campus1	H	2.80	13	1	St. Augustine College	Noncompetitive	NaN	NaN
1	148654	1599951305	2018	Regular	Accepted	Campus1	H	2.50	19	1	University of Illinois at Springfield	Competitive	20.0	23.0
2	148654	479129093	2018	Regular	Accepted	Campus1	H	2.70	20	1	University of Illinois at Springfield	Competitive	20.0	23.0
3	148654	1178256154	2018	Regular	Accepted	Campus1	AA	2.24	22	1	University of Illinois at Springfield	Competitive	20.0	23.0
4	148654	1903150611	2018	Regular	Accepted	Campus1	H	2.16	22	1	University of Illinois at Springfield	Competitive	20.0	23.0

	Case	N	N1	GPAcoef	ACTcoef	Int	Score	Loss	50gpa	50act	50pred	Plus.05/.5red	Plus.1/1pred
0	H:Most Competitive+	490	82.0	3.269467	0.277182	-13.661696	0.834395	0.366029	4.128475	0.590886	0.5	0.574947	0.646600
1	H:Most Competitive	810	370.0	4.001205	0.258949	-13.747247	0.716644	0.545084	3.638453	-3.131696	0.5	0.581646	0.659051
2	H:Highly Competitive	1397	756.0	3.055204	0.238978	-9.221920	0.736465	0.525613	3.284773	-3.405059	0.5	0.567645	0.632858
3	H:Very Competitive	3218	1910.0	2.218157	0.395526	-5.042510	0.851073	0.378709	2.840413	-3.180506	0.5	0.576561	0.649614
4	H:Competitive	4013	3326.0	3.266927	0.454606	-7.186940	0.890890	0.273525	2.507599	-2.211151	0.5	0.596439	0.685960
5	H:Less Competitive	467	127.0	8.701126	0.688007	-20.877240	0.942408	0.195938	2.376667	0.287152	0.5	0.685477	0.826083
6	H:Noncompetitive	276	125.0	1.834104	0.564467	-3.168692	0.858974	0.378682	2.244545	-1.679529	0.5	0.592410	0.678716
7	B:Most Competitive+	176	22.0	1.873550	0.372499	-7.266101	0.876543	0.302931	4.106923	-1.150140	0.5	0.569528	0.636419
8	B:Most Competitive	389	140.0	3.961763	0.295798	-13.546436	0.726790	0.499708	3.698269	-3.736435	0.5	0.585644	0.666406
9	B:Highly Competitive	684	364.0	2.906901	0.332415	-7.908246	0.765411	0.502075	3.299068	-5.059396	0.5	0.577264	0.650924
10	B:Very Competitive	1378	862.0	1.793500	0.513101	-3.332770	0.832465	0.399752	2.920345	-3.712463	0.5	0.585702	0.666512
11	B:Competitive	3628	2881.0	1.962268	0.457714	-3.427814	0.863822	0.319347	2.466144	-3.083632	0.5	0.581022	0.657898
12	B:Less Competitive	712	478.0	3.410053	0.383454	-7.475473	0.915596	0.226717	2.228704	-0.324744	0.5	0.589580	0.673588
13	B:Noncompetitive	299	189.0	3.166343	0.339668	-5.808775	0.866071	0.313812	1.931905	-0.907646	0.5	0.581309	0.658429
14	W:Most Competitive+	241	23.0	3.245932	0.222532	-14.352469	0.900000	0.279727	4.170000	3.671081	0.5	0.567967	0.633468
15	W:Most Competitive	330	128.0	2.488886	0.121962	-8.937293	0.647651	0.611409	3.748138	-3.209172	0.5	0.546224	0.591665
16	W:Highly Competitive	325	201.0	2.901873	0.256741	-8.228206	0.771930	0.480871	3.299529	-5.244998	0.5	0.567943	0.633423
17	W:Very Competitive	645	428.0	2.196075	0.540214	-3.800219	0.883858	0.284568	3.027842	-5.274109	0.5	0.593852	0.681315
18	W:Competitive	422	364.0	3.131017	0.480638	-5.785281	0.931373	0.209902	2.394009	-3.558606	0.5	0.597935	0.688634

	Case	N	N1	GPAcoef	ACTcoef	Int	Score	Loss	50gpa	50act	50pred	Plus.05/.5red	Plus.1/1pred
0	H:148584	63	42.0	5.209164	1.028021	-33.571156	0.825397	0.295799	2.799637	18.469840	0.5	0.684487	0.824760
1	H:169248	318	291.0	7.593017	0.456457	-25.884838	0.933962	0.166390	2.335429	17.859055	0.5	0.647457	0.771316
2	H:154527	29	24.0	4.953492	0.152787	-14.555692	0.931034	0.313899	2.360950	18.723693	0.5	0.580315	0.656590
3	H:150136	36	27.0	11.980035	0.549282	-43.549191	0.888889	0.200145	2.824525	17.679957	0.5	0.705503	0.851610
4	H:197869	31	11.0	5.584194	1.039670	-40.635701	0.838710	0.276262	3.122500	22.313863	0.5	0.689770	0.831751

	Case	N	N1	GPAcoef	ACTcoef	Int	Score	Loss	50gpa	50act	50pred	Plus.05/.5red	Plus.1/1pred
0	H:144281	228	174.0	5.514042	0.473419	-21.359345	0.903509	0.223911	2.268035	18.700768	0.5	0.625372	0.735911
1	H:170532	256	136.0	10.390712	0.371606	-44.451564	0.812500	0.393377	3.440856	23.408159	0.5	0.669370	0.803873
2	H:Arrupe	371	189.0	1.406622	0.093909	-5.325053	0.646900	0.626458	2.536979	18.704015	0.5	0.529288	0.558375
3	H:145691	325	249.0	6.913662	0.385680	-26.663059	0.870769	0.266130	2.803378	18.879492	0.5	0.631469	0.745935
4	AA:216597	155	71.0	6.062056	0.681934	-37.720571	0.838710	0.401508	3.610755	23.216287	0.5	0.655673	0.783832
5	AA:178396	204	154.0	6.682889	1.572445	-52.247808	0.946078	0.152027	3.072540	20.168823	0.5	0.754057	0.903848
6	AA:170532	161	78.0	8.119638	0.406221	-37.170041	0.863354	0.402563	3.505716	21.428974	0.5	0.647734	0.771744
7	AA:Arrupe	172	92.0	2.211974	0.041943	-5.857210	0.697674	0.577381	2.305457	18.062666	0.5	0.532845	0.565408
8	AA:145691	167	91.0	6.308057	0.384098	-23.980241	0.838323	0.317146	2.647700	18.949338	0.5	0.624209	0.733979
9	AA:170082	547	428.0	10.117137	0.703178	-41.935480	0.886654	0.247251	2.854407	18.568616	0.5	0.702127	0.847470
10	AA:144740	310	254.0	5.471804	0.583672	-26.181706	0.909677	0.204806	2.725817	19.302919	0.5	0.637707	0.755996

	Title	Barrons	RACE	hs_class
0	H:Most Competitive+	Most Competitive+	H	2018
1	H:Most Competitive	Most Competitive	H	2018
2	H:Highly Competitive	Highly Competitive	H	2018
3	H:Very Competitive	Very Competitive	H	2018
4	H:Competitive	Competitive	H	2018