import os import numpy import pandas as pd import random, time from collections import Counter def create_news(Constants, news_type, news_bias): all_news = dict() # bias_same := weight for similar news ; bias_different := weight for different bias_same, bias_different = news_bias, (1 - news_bias) for newscat, newscat_group in Constants.facts.groupby('FACTGROUP'): # all possible news events temp_newslist = newscat_group.FACTID.unique().tolist() news_monthly = dict() for month in range(3): # different number of events based on month if month == 0: num_events = 2 else: num_events = 1 # positivity scores pos_scores = [Constants.fact_positivity[t] for t in temp_newslist] # counts of positive and negative news pos_counts = Counter(pos_scores) # assign weights based on news_type (heavier on newstype) # reweight after every iteration to maintain same probability weights = [] for t in pos_scores: if t == news_type: weights.append(bias_same / pos_counts[news_type]) else: weights.append(bias_different / (len(temp_newslist) - pos_counts[news_type])) # now assign the month's events month_events = numpy.random.choice(temp_newslist, num_events , p = weights, replace = False).tolist() # remove the ones that occured from the possible list for ev in month_events: temp_newslist.remove(ev) news_monthly[month] = month_events all_news[newscat] = news_monthly return all_news class Draws(): def __init__(self, news, np_list, newspapers): self.news = news self.np_list = np_list self.poss_events = dict() self.newspapers = newspapers def create_poss_events(self): for month, np in enumerate(self.np_list): news_types = self.newspapers[np].split(';') # cummulative list of possible events that occured and can occur temp = [] for m in range(month + 1): temp.extend(self.news[news_types[0]][m] + self.news[news_types[1]][m]) self.poss_events[month] = set(temp) def draw_helper(self, draw_number): # store possible events self.create_poss_events() # choose the draw draw_name = 'draw_' + str(draw_number) # get the method from 'self'. Default to a lambda. my_draw = getattr(self, draw_name, lambda: "nothing") # init events = dict() used = [] available_events = dict() for month, np in enumerate(self.np_list): # all news all_news = self.poss_events[month].copy() # all news minus used new_news = all_news.difference(used) # draw the events events[month] = my_draw(list(all_news), list(new_news), month, events, used) # update in place the used events used.extend(list(events[month])) return events def draw(self, events2bedrawn, num_events, used_news): counts = Counter(used_news) choices = [t for t in events2bedrawn if counts[t] < 2] return numpy.random.choice(choices, num_events, replace = False) def draw_0(self, anews, nnews, current_period, ev, used_news): return numpy.random.choice(nnews, 3, replace = False).tolist() def draw_1(self, anews, nnews, current_period, ev, used_news): event_period = [] if current_period == 1: event_period.extend(self.draw(ev[0], 1, used_news).tolist()) event_period.extend(numpy.random.choice(nnews, 2, replace = False).tolist()) else: event_period = numpy.random.choice(nnews, 3, replace = False).tolist() random.shuffle(event_period) return event_period def draw_2(self, anews, nnews, current_period, ev, used_news): event_period = [] if current_period == 2: event_period.extend(self.draw(ev[1], 1, used_news).tolist()) event_period.extend(numpy.random.choice(nnews, 2, replace = False).tolist()) else: event_period = numpy.random.choice(nnews, 3, replace = False).tolist() random.shuffle(event_period) return event_period def draw_3(self, anews, nnews, current_period, ev, used_news): event_period = [] if current_period == 2: event_period.extend(self.draw(ev[0], 1, used_news).tolist()) event_period.extend(numpy.random.choice(nnews, 2, replace = False).tolist()) else: event_period = numpy.random.choice(nnews, 3, replace = False).tolist() random.shuffle(event_period) return event_period def draw_4(self, anews, nnews, current_period, ev, used_news): event_period = [] if current_period == 1: event_period.extend(self.draw(ev[0], 2, used_news).tolist()) event_period.extend(numpy.random.choice(nnews, 1, replace = False).tolist()) else: event_period = numpy.random.choice(nnews, 3, replace = False).tolist() random.shuffle(event_period) return event_period def draw_5(self, anews, nnews, current_period, ev, used_news): event_period = [] if current_period == 2: event_period.extend(self.draw(ev[1], 2, used_news).tolist()) event_period.extend(numpy.random.choice(nnews, 1, replace = False).tolist()) else: event_period = numpy.random.choice(nnews, 3, replace = False).tolist() random.shuffle(event_period) return event_period def draw_6(self, anews, nnews, current_period, ev, used_news): event_period = [] if current_period == 2: event_period.extend(self.draw(ev[0], 2, used_news).tolist()) event_period.extend(numpy.random.choice(nnews, 1, replace = False).tolist()) else: event_period = numpy.random.choice(nnews, 3, replace = False).tolist() random.shuffle(event_period) return event_period def draw_7(self, anews, nnews, current_period, ev, used_news): event_period = [] if current_period == 1: event_period.extend(self.draw(ev[0], 1, used_news).tolist()) event_period.extend(numpy.random.choice(nnews, 2, replace = False).tolist()) elif current_period == 2: event_period.extend(self.draw(ev[0], 1, used_news).tolist()) event_period.extend(numpy.random.choice(nnews, 2, replace = False).tolist()) else: event_period = numpy.random.choice(nnews, 3, replace = False).tolist() random.shuffle(event_period) return event_period def draw_8(self, anews, nnews, current_period, ev, used_news): event_period = [] if current_period == 1: event_period.extend(self.draw(ev[0], 1, used_news).tolist()) event_period.extend(numpy.random.choice(nnews, 2, replace = False).tolist()) elif current_period == 2: event_period.extend(self.draw(ev[1], 1, used_news).tolist()) event_period.extend(numpy.random.choice(nnews, 2, replace = False).tolist()) else: event_period = numpy.random.choice(nnews, 3, replace = False).tolist() random.shuffle(event_period) return event_period def draw_9(self, anews, nnews, current_period, ev, used_news): event_period = [] if current_period == 2: event_period.extend(self.draw(ev[0], 1, used_news).tolist()) event_period.extend(self.draw(ev[1], 1, used_news).tolist()) event_period.extend(numpy.random.choice(nnews, 1, replace = False).tolist()) else: event_period = numpy.random.choice(nnews, 3, replace = False).tolist() random.shuffle(event_period) return event_period