def simulate_data(rows, path, file):
'''Simulate EMS data and output to csv
Define length with row and file output with path and filname'''
global initial_event
for event in range(rows):
cc = df_ccs.sample().to_dict()
event_id = 0
d1 = datetime.strptime('1/1/2023', '%m/%d/%Y')
d2 = datetime.strptime('06/1/2023', '%m/%d/%Y')
start_timestamp = int(d1.timestamp())
end_timestamp = int(d2.timestamp())
random_timestamp = random.uniform(start_timestamp, end_timestamp)
dtime_event = datetime.fromtimestamp(random_timestamp)
dtime_page = dtime_event + timedelta(minutes = random.randint(2,10))
dtime_arrived = dtime_page + timedelta(minutes = random.randint(2,8))
dtime_transport = dtime_arrived + timedelta(minutes = random.randint(3,12))
dtime_dest = dtime_transport + timedelta(minutes = random.randint(4,10))
random_letters = ''.join(random.sample(LETTERS, k=3))
random_numbers = ''.join(str(random.randint(0, 9)) for _ in range(7))
pt_id = f'id-{random_numbers}{random_letters}'
mpds_code = cc['mpds_code'].item()
mpds_name = cc['mpds_name'].item()
impression_code = cc['impression_code'].item()
impression_name = cc['impression_name'].item()
lic_lvl = random.choice(['EMR', 'PCP', 'ACP'])
age = random.choices(age_range, age_weights)
age = age[0] + random.randint(0,9)
spo2 = random.choices(spo2_range, spo2_weights)
spo2_first = spo2[0] + random.randint(cc['spo2'].item(),0)
spo2_last = min(spo2_first + random.randint(-5,5),100)
bp_sys_first = random.randint(90, 180) + random.randint(-1*cc['bp'].item(),cc['bp'].item())
bp_sys_last = bp_sys_first + random.randint(-10,10)
bp_dia_first = bp_sys_first - 40 + random.randint(-10, 10)
bp_dia_last = bp_sys_last - 40 + random.randint(-10, 10)
temp_first = round(random.uniform(36,38),1) + (
random.randint(0,cc['temp'].item()) if cc['temp'].item() >= 0 else random.randint(cc['temp'].item(),0))
temp_last = round(temp_first + random.uniform(-0.3,0.3), 1)
pain = random.choices(pain_range, pain_weights)
pain_first = pain[0] + random.randint(0,cc['pain'].item())
pain_last = max(pain_first + random.randint(-2, 2),0)
pain_last = min(pain_last,10)
bgl_first = round(random.uniform(4.5,7) + (
random.randint(0,cc['bgl'].item()) if cc['bgl'].item() >= 0 else random.randint(cc['bgl'].item(),0)),1)
bgl_last = round(bgl_first + random.uniform(-0.5,0.5), 1)
gcs_first = random.randint(14, 15) + random.randint(cc['gcs'].item(),0)
gcs_last = max(gcs_first + random.randint(-3,3),3)
gcs_last = min(gcs_last,15)
events.append([dtime_event, dtime_page, dtime_arrived, dtime_transport, dtime_dest,
pt_id, mpds_code, mpds_name, impression_code, impression_name, lic_lvl,
age, spo2_first, spo2_last, bp_sys_first, bp_sys_last,
bp_dia_first, bp_dia_last, temp_first, temp_last,
pain_first, pain_last,
bgl_first, bgl_last, gcs_first, gcs_last])
df = pl.DataFrame(data=events,schema=['dtime_event', 'dtime_page', 'dtime_arrived', 'dtime_transport', 'dtime_dest',
'pt_id','mpds_code','mpds_name', 'impression_code', 'impression_name', 'license',
'age', 'spo2_first','spo2_last', 'bp_sys_first', 'sp_sys_last',
'bp_dia_first','bp_dia_last','temp_first', 'temp_last',
'pain_first', 'pain_last',
'bgl_first', 'bgl_last','gcs_first','gcs_last'])
for id in range(rows):
initial_event = initial_event + random.randint(1, 1000)
event_id = initial_event
event_ids.append(event_id)
df = df.sort(["dtime_event"], descending=False)
df = df.select([
pl.Series(name="event_id", values=event_ids),
pl.all()
])
df.write_csv(f'{path}/{file}.csv')