Parsing HParams
This page demonstrates the parsing process for hparams events.
Preparing Sample Event Logs
First, let’s import some libraries and prepare the environment for our sample event logs:
>>> import os
>>> import tempfile
>>> # Define some constants
>>> N_RUNS = 3
>>> N_EVENTS = 2
>>> # Prepare temp dirs for storing event files
>>> tmpdirs = {}
Before parsing a event file, we need to generate it first. The sample event files are generated by three commonly used event log writers.
We can generate the events by PyTorch:
>>> tmpdirs['torch'] = tempfile.TemporaryDirectory()
>>> from torch.utils.tensorboard import SummaryWriter
>>> log_dir = tmpdirs['torch'].name
>>> for i in range(N_RUNS): # 3 independent runs
... hp_dict = {
... 'C': i,
... 'run_id': i,
... 'name': f'y=2x+{i}'
... }
... writer = SummaryWriter(os.path.join(log_dir, f'run{i}'))
... writer.add_hparams(hp_dict, {'metric': i}, run_name='.')
... for j in range(N_EVENTS): # 2 events
... writer.add_scalar('y=2x+C', j * 2 + i, j)
... writer.close()
and quickly check the results:
>>> from tbparse import SummaryReader
>>> SummaryReader(log_dir, pivot=True, extra_columns={'dir_name'}).scalars
step metric y=2x+C dir_name
0 0 0.0 0.0 run0
1 1 NaN 2.0 run0
2 0 1.0 1.0 run1
3 1 NaN 3.0 run1
4 0 2.0 2.0 run2
5 1 NaN 4.0 run2
>>> SummaryReader(log_dir, pivot=True, extra_columns={'dir_name'}).hparams
C name run_id dir_name
0 0.0 y=2x+0 0.0 run0
1 1.0 y=2x+1 1.0 run1
2 2.0 y=2x+2 2.0 run2
We can generate the events by TensorFlow2 / Keras:
>>> tmpdirs['tensorflow'] = tempfile.TemporaryDirectory()
>>> import tensorflow as tf
>>> from tensorboard.plugins.hparams import api as hp
>>> log_dir = tmpdirs['tensorflow'].name
>>> for i in range(N_RUNS): # 3 independent runs
... hp_dict = {
... 'C': i,
... 'run_id': i,
... 'name': f'y=2x+{i}'
... }
... writer = tf.summary.create_file_writer(os.path.join(log_dir, f'run{i}'))
... writer.set_as_default()
... assert hp.hparams(hp_dict)
... assert tf.summary.scalar('metric', i, step=0)
... for j in range(N_EVENTS): # 2 events
... assert tf.summary.scalar('y=2x+C', j * 2 + i, j)
... writer.close()
and quickly check the results:
>>> from tbparse import SummaryReader
>>> SummaryReader(log_dir, pivot=True, extra_columns={'dir_name'}).tensors
step metric y=2x+C dir_name
0 0 0.0 0.0 run0
1 1 NaN 2.0 run0
2 0 1.0 1.0 run1
3 1 NaN 3.0 run1
4 0 2.0 2.0 run2
5 1 NaN 4.0 run2
>>> SummaryReader(log_dir, pivot=True, extra_columns={'dir_name'}).hparams
C name run_id dir_name
0 0.0 y=2x+0 0.0 run0
1 1.0 y=2x+1 1.0 run1
2 2.0 y=2x+2 2.0 run2
We can generate the events by TensorboardX:
>>> tmpdirs['tensorboardX'] = tempfile.TemporaryDirectory()
>>> from tensorboardX import SummaryWriter
>>> log_dir = tmpdirs['tensorboardX'].name
>>> for i in range(N_RUNS): # 3 independent runs
... hp_dict = {
... 'C': i,
... 'run_id': i,
... 'name': f'y=2x+{i}'
... }
... writer = SummaryWriter(os.path.join(log_dir, f'run{i}'))
... event_filepath = writer.file_writer.event_writer._ev_writer._file_name
... event_filename = os.path.basename(event_filepath)
... writer.add_hparams(hp_dict, {'metric': i}, name='hp')
... for j in range(N_EVENTS): # 2 events
... writer.add_scalar('y=2x+C', j * 2 + i, j)
... writer.close()
and quickly check the results:
>>> from tbparse import SummaryReader
>>> SummaryReader(log_dir, pivot=True, extra_columns={'dir_name'}).scalars
step metric y_2x_C dir_name
0 0 NaN 0.0 run0
1 1 NaN 2.0 run0
2 0 0.0 NaN run0/hp
3 0 NaN 1.0 run1
4 1 NaN 3.0 run1
5 0 1.0 NaN run1/hp
6 0 NaN 2.0 run2
7 1 NaN 4.0 run2
8 0 2.0 NaN run2/hp
>>> SummaryReader(log_dir, pivot=True, extra_columns={'dir_name'}).hparams
C name run_id dir_name
0 0.0 y=2x+0 0.0 run0/hp
1 1.0 y=2x+1 1.0 run1/hp
2 2.0 y=2x+2 2.0 run2/hp
Note
This tutorial assumes you only log a set of HParams under each run directory.
Parsing Event Logs
In different use cases, we will want to read the event logs in different styles.
We further show different configurations of the tbparse.SummaryReader
class.
We first store the log directory path in the log_dir
variable.
>>> log_dir = tmpdirs['torch'].name
>>> reader = SummaryReader(log_dir, extra_columns={'dir_name'}) # long format
>>> reader.hparams
tag value dir_name
0 C 0.0 run0
1 name y=2x+0 run0
2 run_id 0.0 run0
3 C 1.0 run1
4 name y=2x+1 run1
5 run_id 1.0 run1
6 C 2.0 run2
7 name y=2x+2 run2
8 run_id 2.0 run2
>>> reader = SummaryReader(log_dir, pivot=True, extra_columns={'dir_name'}) # wide format
>>> reader.hparams
C name run_id dir_name
0 0.0 y=2x+0 0.0 run0
1 1.0 y=2x+1 1.0 run1
2 2.0 y=2x+2 2.0 run2
We first store the log directory path in the log_dir
variable.
>>> log_dir = tmpdirs['tensorflow'].name
>>> reader = SummaryReader(log_dir, extra_columns={'dir_name'}) # long format
>>> reader.hparams
tag value dir_name
0 C 0.0 run0
1 name y=2x+0 run0
2 run_id 0.0 run0
3 C 1.0 run1
4 name y=2x+1 run1
5 run_id 1.0 run1
6 C 2.0 run2
7 name y=2x+2 run2
8 run_id 2.0 run2
>>> reader = SummaryReader(log_dir, pivot=True, extra_columns={'dir_name'}) # wide format
>>> reader.hparams
C name run_id dir_name
0 0.0 y=2x+0 0.0 run0
1 1.0 y=2x+1 1.0 run1
2 2.0 y=2x+2 2.0 run2
We first store the log directory path in the log_dir
variable.
>>> log_dir = tmpdirs['tensorboardX'].name
>>> reader = SummaryReader(log_dir, extra_columns={'dir_name'}) # long format
>>> reader.hparams
tag value dir_name
0 C 0.0 run0/hp
1 name y=2x+0 run0/hp
2 run_id 0.0 run0/hp
3 C 1.0 run1/hp
4 name y=2x+1 run1/hp
5 run_id 1.0 run1/hp
6 C 2.0 run2/hp
7 name y=2x+2 run2/hp
8 run_id 2.0 run2/hp
>>> reader = SummaryReader(log_dir, pivot=True, extra_columns={'dir_name'}) # wide format
>>> reader.hparams
C name run_id dir_name
0 0.0 y=2x+0 0.0 run0/hp
1 1.0 y=2x+1 1.0 run1/hp
2 2.0 y=2x+2 2.0 run2/hp
Warning
When accessing SummaryReader.hparams
, the events stored in
each event file are collected internally. The best practice is to store the
returned results in a DataFrame as shown in the samples, instead of repeatedly
accessing SummaryReader.hparams
.
Extra Columns
See the Extra Columns page for more details.
Filtering Events Based on HParams
TensorBoard reads data from a full directory, and organizes it into the history of a single TensorFlow execution.
Since tensorboard considers all event files under a run directory as the same run, we can filter out irrelevant events based on the HParams stored under the run directory.
In the following samples, we use the event files generated by PyTorch for simplicity. Event files generated by TensorFlow2/Keras or TensorboardX can be similarily parsed with minor modifications. (escape the special characters in tags)
>>> log_dir = tmpdirs['torch'].name
Filtering Events with a Single Criterion
Assume we want to keep events with HParams C == 0.0
.
>>> # filter long scalars with long hparams
>>> reader = SummaryReader(log_dir, extra_columns={'dir_name'}) # long format
>>> hp = reader.hparams
>>> hp
tag value dir_name
0 C 0.0 run0
1 name y=2x+0 run0
2 run_id 0.0 run0
3 C 1.0 run1
4 name y=2x+1 run1
5 run_id 1.0 run1
6 C 2.0 run2
7 name y=2x+2 run2
8 run_id 2.0 run2
>>> hp_filtered = hp[(hp['tag']=='C') & (hp['value']==0.0)]
>>> hp_filtered
tag value dir_name
0 C 0.0 run0
>>> run_names = list(hp_filtered['dir_name'])
>>> run_names
['run0']
>>> df = reader.scalars
>>> df
step tag value dir_name
0 0 metric 0.0 run0
1 0 y=2x+C 0.0 run0
2 1 y=2x+C 2.0 run0
3 0 metric 1.0 run1
4 0 y=2x+C 1.0 run1
5 1 y=2x+C 3.0 run1
6 0 metric 2.0 run2
7 0 y=2x+C 2.0 run2
8 1 y=2x+C 4.0 run2
>>> df_filtered = df[df['dir_name'].isin(run_names)]
>>> df_filtered
step tag value dir_name
0 0 metric 0.0 run0
1 0 y=2x+C 0.0 run0
2 1 y=2x+C 2.0 run0
>>> # filter wide scalars with wide hparams
>>> reader = SummaryReader(log_dir, pivot=True, extra_columns={'dir_name'}) # wide format
>>> hp = reader.hparams
>>> hp
C name run_id dir_name
0 0.0 y=2x+0 0.0 run0
1 1.0 y=2x+1 1.0 run1
2 2.0 y=2x+2 2.0 run2
>>> hp_filtered = hp[hp['C']==0.0]
>>> hp_filtered
C name run_id dir_name
0 0.0 y=2x+0 0.0 run0
>>> run_names = list(hp_filtered['dir_name'])
>>> run_names
['run0']
>>> df = reader.scalars
>>> df
step metric y=2x+C dir_name
0 0 0.0 0.0 run0
1 1 NaN 2.0 run0
2 0 1.0 1.0 run1
3 1 NaN 3.0 run1
4 0 2.0 2.0 run2
5 1 NaN 4.0 run2
>>> df_filtered = df[df['dir_name'].isin(run_names)]
>>> df_filtered
step metric y=2x+C dir_name
0 0 0.0 0.0 run0
1 1 NaN 2.0 run0
>>> # filter long scalars with wide hparams
>>> reader = SummaryReader(log_dir, pivot=True, extra_columns={'dir_name'}) # wide format
>>> hp = reader.hparams
>>> hp
C name run_id dir_name
0 0.0 y=2x+0 0.0 run0
1 1.0 y=2x+1 1.0 run1
2 2.0 y=2x+2 2.0 run2
>>> hp_filtered = hp[hp['C']==0.0]
>>> hp_filtered
C name run_id dir_name
0 0.0 y=2x+0 0.0 run0
>>> run_names = list(hp_filtered['dir_name'])
>>> run_names
['run0']
>>> reader = SummaryReader(log_dir, extra_columns={'dir_name'}) # long format
>>> df = reader.scalars
>>> df
step tag value dir_name
0 0 metric 0.0 run0
1 0 y=2x+C 0.0 run0
2 1 y=2x+C 2.0 run0
3 0 metric 1.0 run1
4 0 y=2x+C 1.0 run1
5 1 y=2x+C 3.0 run1
6 0 metric 2.0 run2
7 0 y=2x+C 2.0 run2
8 1 y=2x+C 4.0 run2
>>> df_filtered = df[df['dir_name'].isin(run_names)]
>>> df_filtered
step tag value dir_name
0 0 metric 0.0 run0
1 0 y=2x+C 0.0 run0
2 1 y=2x+C 2.0 run0
>>> # filter wide scalars with long hparams
>>> reader = SummaryReader(log_dir, extra_columns={'dir_name'}) # long format
>>> hp = reader.hparams
>>> hp
tag value dir_name
0 C 0.0 run0
1 name y=2x+0 run0
2 run_id 0.0 run0
3 C 1.0 run1
4 name y=2x+1 run1
5 run_id 1.0 run1
6 C 2.0 run2
7 name y=2x+2 run2
8 run_id 2.0 run2
>>> hp_filtered = hp[(hp['tag']=='C') & (hp['value']==0.0)]
>>> hp_filtered
tag value dir_name
0 C 0.0 run0
>>> run_names = list(hp_filtered['dir_name'])
>>> run_names
['run0']
>>> reader = SummaryReader(log_dir, pivot=True, extra_columns={'dir_name'}) # wide format
>>> df = reader.scalars
>>> df
step metric y=2x+C dir_name
0 0 0.0 0.0 run0
1 1 NaN 2.0 run0
2 0 1.0 1.0 run1
3 1 NaN 3.0 run1
4 0 2.0 2.0 run2
5 1 NaN 4.0 run2
>>> df_filtered = df[df['dir_name'].isin(run_names)]
>>> df_filtered
step metric y=2x+C dir_name
0 0 0.0 0.0 run0
1 1 NaN 2.0 run0
Filtering Events with Multiple Criteria
Assume we want to keep events with HParams C == 0.0
or name == 'y=2x+2'
.
>>> # filter long scalars with long hparams
>>> reader = SummaryReader(log_dir, extra_columns={'dir_name'}) # long format
>>> hp = reader.hparams
>>> hp
tag value dir_name
0 C 0.0 run0
1 name y=2x+0 run0
2 run_id 0.0 run0
3 C 1.0 run1
4 name y=2x+1 run1
5 run_id 1.0 run1
6 C 2.0 run2
7 name y=2x+2 run2
8 run_id 2.0 run2
>>> cond1 = (hp['tag']=='C') & (hp['value']==0.0)
>>> cond2 = (hp['tag']=='name') & (hp['value']=='y=2x+2')
>>> hp_filtered = hp[cond1 | cond2]
>>> hp_filtered
tag value dir_name
0 C 0.0 run0
7 name y=2x+2 run2
>>> run_names = list(hp_filtered['dir_name'])
>>> run_names
['run0', 'run2']
>>> df = reader.scalars
>>> df
step tag value dir_name
0 0 metric 0.0 run0
1 0 y=2x+C 0.0 run0
2 1 y=2x+C 2.0 run0
3 0 metric 1.0 run1
4 0 y=2x+C 1.0 run1
5 1 y=2x+C 3.0 run1
6 0 metric 2.0 run2
7 0 y=2x+C 2.0 run2
8 1 y=2x+C 4.0 run2
>>> df_filtered = df[df['dir_name'].isin(run_names)]
>>> df_filtered
step tag value dir_name
0 0 metric 0.0 run0
1 0 y=2x+C 0.0 run0
2 1 y=2x+C 2.0 run0
6 0 metric 2.0 run2
7 0 y=2x+C 2.0 run2
8 1 y=2x+C 4.0 run2
>>> # filter wide scalars with wide hparams
>>> reader = SummaryReader(log_dir, pivot=True, extra_columns={'dir_name'}) # wide format
>>> hp = reader.hparams
>>> hp
C name run_id dir_name
0 0.0 y=2x+0 0.0 run0
1 1.0 y=2x+1 1.0 run1
2 2.0 y=2x+2 2.0 run2
>>> hp_filtered = hp[(hp['C']==0.0) | (hp['name']=='y=2x+2')]
>>> hp_filtered
C name run_id dir_name
0 0.0 y=2x+0 0.0 run0
2 2.0 y=2x+2 2.0 run2
>>> run_names = list(hp_filtered['dir_name'])
>>> run_names
['run0', 'run2']
>>> df = reader.scalars
>>> df
step metric y=2x+C dir_name
0 0 0.0 0.0 run0
1 1 NaN 2.0 run0
2 0 1.0 1.0 run1
3 1 NaN 3.0 run1
4 0 2.0 2.0 run2
5 1 NaN 4.0 run2
>>> df_filtered = df[df['dir_name'].isin(run_names)]
>>> df_filtered
step metric y=2x+C dir_name
0 0 0.0 0.0 run0
1 1 NaN 2.0 run0
4 0 2.0 2.0 run2
5 1 NaN 4.0 run2
>>> # filter long scalars with wide hparams
>>> reader = SummaryReader(log_dir, pivot=True, extra_columns={'dir_name'}) # wide format
>>> hp = reader.hparams
>>> hp
C name run_id dir_name
0 0.0 y=2x+0 0.0 run0
1 1.0 y=2x+1 1.0 run1
2 2.0 y=2x+2 2.0 run2
>>> hp_filtered = hp[(hp['C']==0.0) | (hp['name']=='y=2x+2')]
>>> hp_filtered
C name run_id dir_name
0 0.0 y=2x+0 0.0 run0
2 2.0 y=2x+2 2.0 run2
>>> run_names = list(hp_filtered['dir_name'])
>>> run_names
['run0', 'run2']
>>> reader = SummaryReader(log_dir, extra_columns={'dir_name'}) # long format
>>> df = reader.scalars
>>> df
step tag value dir_name
0 0 metric 0.0 run0
1 0 y=2x+C 0.0 run0
2 1 y=2x+C 2.0 run0
3 0 metric 1.0 run1
4 0 y=2x+C 1.0 run1
5 1 y=2x+C 3.0 run1
6 0 metric 2.0 run2
7 0 y=2x+C 2.0 run2
8 1 y=2x+C 4.0 run2
>>> df_filtered = df[df['dir_name'].isin(run_names)]
>>> df_filtered
step tag value dir_name
0 0 metric 0.0 run0
1 0 y=2x+C 0.0 run0
2 1 y=2x+C 2.0 run0
6 0 metric 2.0 run2
7 0 y=2x+C 2.0 run2
8 1 y=2x+C 4.0 run2
>>> # filter wide scalars with long hparams
>>> reader = SummaryReader(log_dir, extra_columns={'dir_name'}) # long format
>>> hp = reader.hparams
>>> hp
tag value dir_name
0 C 0.0 run0
1 name y=2x+0 run0
2 run_id 0.0 run0
3 C 1.0 run1
4 name y=2x+1 run1
5 run_id 1.0 run1
6 C 2.0 run2
7 name y=2x+2 run2
8 run_id 2.0 run2
>>> cond1 = (hp['tag']=='C') & (hp['value']==0.0)
>>> cond2 = (hp['tag']=='name') & (hp['value']=='y=2x+2')
>>> hp_filtered = hp[cond1 | cond2]
>>> hp_filtered
tag value dir_name
0 C 0.0 run0
7 name y=2x+2 run2
>>> run_names = list(hp_filtered['dir_name'])
>>> run_names
['run0', 'run2']
>>> reader = SummaryReader(log_dir, pivot=True, extra_columns={'dir_name'}) # wide format
>>> df = reader.scalars
>>> df
step metric y=2x+C dir_name
0 0 0.0 0.0 run0
1 1 NaN 2.0 run0
2 0 1.0 1.0 run1
3 1 NaN 3.0 run1
4 0 2.0 2.0 run2
5 1 NaN 4.0 run2
>>> df_filtered = df[df['dir_name'].isin(run_names)]
>>> df_filtered
step metric y=2x+C dir_name
0 0 0.0 0.0 run0
1 1 NaN 2.0 run0
4 0 2.0 2.0 run2
5 1 NaN 4.0 run2
Store Filtering Results in an Additional Column
Assume we want to store the name in HParams to an additional column in the scalars DataFrame.
>>> # filter long scalars with long hparams
>>> reader = SummaryReader(log_dir, extra_columns={'dir_name'}) # long format
>>> hp = reader.hparams
>>> hp
tag value dir_name
0 C 0.0 run0
1 name y=2x+0 run0
2 run_id 0.0 run0
3 C 1.0 run1
4 name y=2x+1 run1
5 run_id 1.0 run1
6 C 2.0 run2
7 name y=2x+2 run2
8 run_id 2.0 run2
>>> hp_filtered = hp[hp['tag']=='name']
>>> hp_filtered.set_index('dir_name', inplace=True)
>>> run_to_name = hp_filtered.to_dict()['value']
>>> run_to_name
{'run0': 'y=2x+0', 'run1': 'y=2x+1', 'run2': 'y=2x+2'}
>>> df = reader.scalars
>>> df
step tag value dir_name
0 0 metric 0.0 run0
1 0 y=2x+C 0.0 run0
2 1 y=2x+C 2.0 run0
3 0 metric 1.0 run1
4 0 y=2x+C 1.0 run1
5 1 y=2x+C 3.0 run1
6 0 metric 2.0 run2
7 0 y=2x+C 2.0 run2
8 1 y=2x+C 4.0 run2
>>> df['hp/name'] = df['dir_name'].map(run_to_name)
>>> df
step tag value dir_name hp/name
0 0 metric 0.0 run0 y=2x+0
1 0 y=2x+C 0.0 run0 y=2x+0
2 1 y=2x+C 2.0 run0 y=2x+0
3 0 metric 1.0 run1 y=2x+1
4 0 y=2x+C 1.0 run1 y=2x+1
5 1 y=2x+C 3.0 run1 y=2x+1
6 0 metric 2.0 run2 y=2x+2
7 0 y=2x+C 2.0 run2 y=2x+2
8 1 y=2x+C 4.0 run2 y=2x+2
>>> # filter wide scalars with wide hparams
>>> reader = SummaryReader(log_dir, pivot=True, extra_columns={'dir_name'}) # wide format
>>> hp = reader.hparams
>>> hp
C name run_id dir_name
0 0.0 y=2x+0 0.0 run0
1 1.0 y=2x+1 1.0 run1
2 2.0 y=2x+2 2.0 run2
>>> hp_filtered = hp[['name', 'dir_name']]
>>> hp_filtered.set_index('dir_name', inplace=True)
>>> run_to_name = hp_filtered.to_dict()['name']
>>> run_to_name
{'run0': 'y=2x+0', 'run1': 'y=2x+1', 'run2': 'y=2x+2'}
>>> df = reader.scalars
>>> df
step metric y=2x+C dir_name
0 0 0.0 0.0 run0
1 1 NaN 2.0 run0
2 0 1.0 1.0 run1
3 1 NaN 3.0 run1
4 0 2.0 2.0 run2
5 1 NaN 4.0 run2
>>> df['hp/name'] = df['dir_name'].map(run_to_name)
>>> df
step metric y=2x+C dir_name hp/name
0 0 0.0 0.0 run0 y=2x+0
1 1 NaN 2.0 run0 y=2x+0
2 0 1.0 1.0 run1 y=2x+1
3 1 NaN 3.0 run1 y=2x+1
4 0 2.0 2.0 run2 y=2x+2
5 1 NaN 4.0 run2 y=2x+2
>>> # filter long scalars with wide hparams
>>> reader = SummaryReader(log_dir, pivot=True, extra_columns={'dir_name'}) # wide format
>>> hp = reader.hparams
>>> hp
C name run_id dir_name
0 0.0 y=2x+0 0.0 run0
1 1.0 y=2x+1 1.0 run1
2 2.0 y=2x+2 2.0 run2
>>> hp_filtered = hp[['name', 'dir_name']]
>>> hp_filtered.set_index('dir_name', inplace=True)
>>> run_to_name = hp_filtered.to_dict()['name']
>>> run_to_name
{'run0': 'y=2x+0', 'run1': 'y=2x+1', 'run2': 'y=2x+2'}
>>> reader = SummaryReader(log_dir, extra_columns={'dir_name'}) # long format
>>> df = reader.scalars
>>> df
step tag value dir_name
0 0 metric 0.0 run0
1 0 y=2x+C 0.0 run0
2 1 y=2x+C 2.0 run0
3 0 metric 1.0 run1
4 0 y=2x+C 1.0 run1
5 1 y=2x+C 3.0 run1
6 0 metric 2.0 run2
7 0 y=2x+C 2.0 run2
8 1 y=2x+C 4.0 run2
>>> df['hp/name'] = df['dir_name'].map(run_to_name)
>>> df
step tag value dir_name hp/name
0 0 metric 0.0 run0 y=2x+0
1 0 y=2x+C 0.0 run0 y=2x+0
2 1 y=2x+C 2.0 run0 y=2x+0
3 0 metric 1.0 run1 y=2x+1
4 0 y=2x+C 1.0 run1 y=2x+1
5 1 y=2x+C 3.0 run1 y=2x+1
6 0 metric 2.0 run2 y=2x+2
7 0 y=2x+C 2.0 run2 y=2x+2
8 1 y=2x+C 4.0 run2 y=2x+2
>>> # filter wide scalars with long hparams
>>> reader = SummaryReader(log_dir, extra_columns={'dir_name'}) # long format
>>> hp = reader.hparams
>>> hp
tag value dir_name
0 C 0.0 run0
1 name y=2x+0 run0
2 run_id 0.0 run0
3 C 1.0 run1
4 name y=2x+1 run1
5 run_id 1.0 run1
6 C 2.0 run2
7 name y=2x+2 run2
8 run_id 2.0 run2
>>> hp_filtered = hp[hp['tag']=='name']
>>> hp_filtered.set_index('dir_name', inplace=True)
>>> run_to_name = hp_filtered.to_dict()['value']
>>> run_to_name
{'run0': 'y=2x+0', 'run1': 'y=2x+1', 'run2': 'y=2x+2'}
>>> reader = SummaryReader(log_dir, pivot=True, extra_columns={'dir_name'}) # wide format
>>> df = reader.scalars
>>> df
step metric y=2x+C dir_name
0 0 0.0 0.0 run0
1 1 NaN 2.0 run0
2 0 1.0 1.0 run1
3 1 NaN 3.0 run1
4 0 2.0 2.0 run2
5 1 NaN 4.0 run2
>>> df['hp/name'] = df['dir_name'].map(run_to_name)
>>> df
step metric y=2x+C dir_name hp/name
0 0 0.0 0.0 run0 y=2x+0
1 1 NaN 2.0 run0 y=2x+0
2 0 1.0 1.0 run1 y=2x+1
3 1 NaN 3.0 run1 y=2x+1
4 0 2.0 2.0 run2 y=2x+2
5 1 NaN 4.0 run2 y=2x+2
Storing Hierarchical HParams
Tensorboard only allows logging bool
, float
, int
, str
data.
For hierarchical data, we can serialize other types into json string for later use.
>>> import json
>>> def flatten_dict(hp):
... d = {}
... for key in hp.keys():
... value = hp[key]
... if not isinstance(value, (bool, float, int, str)):
... value = json.dumps(value)
... d[key] = value
... return d
>>> tmpdirs['torch'] = tempfile.TemporaryDirectory()
>>> from torch.utils.tensorboard import SummaryWriter
>>> log_dir = tmpdirs['torch'].name
>>> hp_dict = {
... 'name': 'hp_name',
... 'hierarchical': {'run_id': 0}
... }
>>> writer = SummaryWriter(os.path.join(log_dir, 'run0'))
>>> writer.add_hparams(flatten_dict(hp_dict), {}, run_name='.')
>>> writer.close()
and quickly check the results:
>>> from tbparse import SummaryReader
>>> SummaryReader(log_dir, pivot=True, extra_columns={'dir_name'}).hparams
hierarchical name dir_name
0 {"run_id": 0} hp_name run0
>>> tmpdirs['tensorflow'] = tempfile.TemporaryDirectory()
>>> import tensorflow as tf
>>> from tensorboard.plugins.hparams import api as hp
>>> log_dir = tmpdirs['tensorflow'].name
>>> hp_dict = {
... 'name': 'hp_name',
... 'hierarchical': {'run_id': 0}
... }
>>> writer = tf.summary.create_file_writer(os.path.join(log_dir, 'run0'))
>>> writer.set_as_default()
>>> assert hp.hparams(flatten_dict(hp_dict))
>>> writer.close()
and quickly check the results:
>>> from tbparse import SummaryReader
>>> SummaryReader(log_dir, pivot=True, extra_columns={'dir_name'}).hparams
hierarchical name dir_name
0 {"run_id": 0} hp_name run0
>>> tmpdirs['tensorboardX'] = tempfile.TemporaryDirectory()
>>> from tensorboardX import SummaryWriter
>>> log_dir = tmpdirs['tensorboardX'].name
>>> hp_dict = {
... 'name': 'hp_name',
... 'hierarchical': {'run_id': 0}
... }
>>> writer = SummaryWriter(os.path.join(log_dir, 'run0'))
>>> writer.add_hparams(flatten_dict(hp_dict), {}, name='hp')
>>> writer.close()
and quickly check the results:
>>> from tbparse import SummaryReader
>>> SummaryReader(log_dir, pivot=True, extra_columns={'dir_name'}).hparams
hierarchical name dir_name
0 {"run_id": 0} hp_name run0/hp
Alternatively, you can serialize the entire dictionary to json string and store it with Text
events: Parsing Text.