import pandas as pd import matplotlib.pyplot as plt mypl = pd.read_csv("Data/clean_pl.csv", low_memory=False) # get wages # mypl_2019 = mypl[mypl.syear == 2019] mask = pd.to_numeric(mypl["plc0013_h"], errors="coerce").isna() mypl = mypl[~mask] mypl["plc0013_h"] = pd.to_numeric(mypl["plc0013_h"].values) mypl = mypl[mypl["plc0013_h"] < 12000] ids = mypl.pid # plot histogram of wages plt.style.use(["science"]) fig, ax = plt.subplots(figsize=(7, 5)) ax.hist(mypl["plc0013_h"], bins=20, rwidth=0.9) ax.tick_params('x', rotation=60) ax.set_xticks([2000, 4000, 6000, 8000, 10000, 12000]) ax.set_xlabel("Bruttoverdienst letzten Monat [EUR]") fig.tight_layout() plt.show() biojob = pd.read_stata("Data/biojob.dta") bioedu = pd.read_stata("Data/bioedu.dta") biojob = biojob[biojob.pid.isin(ids)] bioedu = bioedu[bioedu.pid.isin(ids)] biojob.fulltime.value_counts() biojob.einstieg_artk.value_counts() biojob.isco88.value_counts() bioedu.gebjahr.value_counts() exp_data = pd.DataFrame() exp_data["pid"] = ids exp_data[""]