tensorboard.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306
  1. import fastai
  2. from fastai import *
  3. from fastai.vision import *
  4. from fastai.callbacks import *
  5. from fastai.vision.gan import *
  6. from fastai.core import *
  7. import statistics
  8. import torchvision.utils as vutils
  9. from tensorboardX import SummaryWriter
  10. class ModelImageSet():
  11. @staticmethod
  12. def get_list_from_model(learn:Learner, ds_type:DatasetType, batch:Tuple)->[]:
  13. image_sets = []
  14. x,y = batch[0],batch[1]
  15. preds = learn.pred_batch(ds_type=ds_type, batch=(x,y), reconstruct=True)
  16. for orig_px, real_px, gen in zip(x,y,preds):
  17. orig = Image(px=orig_px)
  18. real = Image(px=real_px)
  19. image_set = ModelImageSet(orig=orig, real=real, gen=gen)
  20. image_sets.append(image_set)
  21. return image_sets
  22. def __init__(self, orig:Image, real:Image, gen:Image):
  23. self.orig = orig
  24. self.real = real
  25. self.gen = gen
  26. #TODO: There aren't any callbacks using this yet. Not sure if we want this included (not sure if it's useful, honestly)
  27. class ModelGraphVisualizer():
  28. def __init__(self):
  29. return
  30. def write_model_graph_to_tensorboard(self, md:DataBunch, model:nn.Module, tbwriter:SummaryWriter):
  31. x,y = md.one_batch(ds_type=DatasetType.Valid, detach=False, denorm=False)
  32. tbwriter.add_graph(model=model, input_to_model=x)
  33. class ModelHistogramVisualizer():
  34. def __init__(self):
  35. return
  36. def write_tensorboard_histograms(self, model:nn.Module, iteration:int, tbwriter:SummaryWriter, name:str='model'):
  37. for param_name, values in model.named_parameters():
  38. tag = name + '/weights/' + param_name
  39. tbwriter.add_histogram(tag=tag, values=values, global_step=iteration)
  40. class ModelStatsVisualizer():
  41. def __init__(self):
  42. self.gradients_root = '/gradients/'
  43. def write_tensorboard_stats(self, model:nn.Module, iteration:int, tbwriter:SummaryWriter, name:str='model_stats'):
  44. gradients = [x.grad for x in model.parameters() if x.grad is not None]
  45. gradient_nps = [to_np(x.data) for x in gradients]
  46. if len(gradients) == 0: return
  47. avg_norm = sum(x.data.norm() for x in gradients)/len(gradients)
  48. tbwriter.add_scalar(
  49. tag=name + self.gradients_root + 'avg_norm', scalar_value=avg_norm, global_step=iteration)
  50. median_norm = statistics.median(x.data.norm() for x in gradients)
  51. tbwriter.add_scalar(
  52. tag=name + self.gradients_root + 'median_norm', scalar_value=median_norm, global_step=iteration)
  53. max_norm = max(x.data.norm() for x in gradients)
  54. tbwriter.add_scalar(
  55. tag=name + self.gradients_root + 'max_norm', scalar_value=max_norm, global_step=iteration)
  56. min_norm = min(x.data.norm() for x in gradients)
  57. tbwriter.add_scalar(
  58. tag=name + self.gradients_root + 'min_norm', scalar_value=min_norm, global_step=iteration)
  59. num_zeros = sum((np.asarray(x) == 0.0).sum() for x in gradient_nps)
  60. tbwriter.add_scalar(
  61. tag=name + self.gradients_root + 'num_zeros', scalar_value=num_zeros, global_step=iteration)
  62. avg_gradient = sum(x.data.mean() for x in gradients)/len(gradients)
  63. tbwriter.add_scalar(
  64. tag=name + self.gradients_root + 'avg_gradient', scalar_value=avg_gradient, global_step=iteration)
  65. median_gradient = statistics.median(x.data.median() for x in gradients)
  66. tbwriter.add_scalar(
  67. tag=name + self.gradients_root + 'median_gradient', scalar_value=median_gradient, global_step=iteration)
  68. max_gradient = max(x.data.max() for x in gradients)
  69. tbwriter.add_scalar(
  70. tag=name + self.gradients_root + 'max_gradient', scalar_value=max_gradient, global_step=iteration)
  71. min_gradient = min(x.data.min() for x in gradients)
  72. tbwriter.add_scalar(
  73. tag=name + self.gradients_root + 'min_gradient', scalar_value=min_gradient, global_step=iteration)
  74. class ImageGenVisualizer():
  75. def output_image_gen_visuals(self, learn:Learner, trn_batch:Tuple, val_batch:Tuple, iteration:int, tbwriter:SummaryWriter):
  76. self._output_visuals(learn=learn, batch=val_batch, iteration=iteration,
  77. tbwriter=tbwriter, ds_type=DatasetType.Valid)
  78. self._output_visuals(learn=learn, batch=trn_batch, iteration=iteration,
  79. tbwriter=tbwriter, ds_type=DatasetType.Train)
  80. def _output_visuals(self, learn:Learner, batch:Tuple, iteration:int, tbwriter:SummaryWriter, ds_type:DatasetType):
  81. image_sets = ModelImageSet.get_list_from_model(
  82. learn=learn, batch=batch, ds_type=ds_type)
  83. self._write_tensorboard_images(
  84. image_sets=image_sets, iteration=iteration, tbwriter=tbwriter, ds_type=ds_type)
  85. def _write_tensorboard_images(self, image_sets:[ModelImageSet], iteration:int, tbwriter:SummaryWriter, ds_type:DatasetType):
  86. orig_images = []
  87. gen_images = []
  88. real_images = []
  89. for image_set in image_sets:
  90. orig_images.append(image_set.orig.px)
  91. gen_images.append(image_set.gen.px)
  92. real_images.append(image_set.real.px)
  93. prefix = ds_type.name
  94. tbwriter.add_image(
  95. tag=prefix + ' orig images', img_tensor=vutils.make_grid(orig_images, normalize=True), global_step=iteration)
  96. tbwriter.add_image(
  97. tag=prefix + ' gen images', img_tensor=vutils.make_grid(gen_images, normalize=True), global_step=iteration)
  98. tbwriter.add_image(
  99. tag=prefix + ' real images', img_tensor=vutils.make_grid(real_images, normalize=True), global_step=iteration)
  100. #--------Below are what you actually want to use, in practice----------------#
  101. class LearnerTensorboardWriter(LearnerCallback):
  102. def __init__(self, learn:Learner, base_dir:Path, name:str, loss_iters:int=25, weight_iters:int=1000, stats_iters:int=1000):
  103. super().__init__(learn=learn)
  104. self.base_dir = base_dir
  105. self.name = name
  106. log_dir = base_dir/name
  107. self.tbwriter = SummaryWriter(log_dir=str(log_dir))
  108. self.loss_iters = loss_iters
  109. self.weight_iters = weight_iters
  110. self.stats_iters = stats_iters
  111. self.weight_vis = ModelHistogramVisualizer()
  112. self.model_vis = ModelStatsVisualizer()
  113. self.data = None
  114. self.metrics_root = '/metrics/'
  115. def _update_batches_if_needed(self):
  116. # one_batch function is extremely slow. this is an optimization
  117. update_batches = self.data is not self.learn.data
  118. if update_batches:
  119. self.data = self.learn.data
  120. self.trn_batch = self.learn.data.one_batch(
  121. ds_type=DatasetType.Train, detach=True, denorm=False, cpu=False)
  122. self.val_batch = self.learn.data.one_batch(
  123. ds_type=DatasetType.Valid, detach=True, denorm=False, cpu=False)
  124. def _write_model_stats(self, iteration:int):
  125. self.model_vis.write_tensorboard_stats(
  126. model=self.learn.model, iteration=iteration, tbwriter=self.tbwriter)
  127. def _write_training_loss(self, iteration:int, last_loss:Tensor):
  128. scalar_value = to_np(last_loss)
  129. tag = self.metrics_root + 'train_loss'
  130. self.tbwriter.add_scalar(tag=tag, scalar_value=scalar_value, global_step=iteration)
  131. def _write_weight_histograms(self, iteration:int):
  132. self.weight_vis.write_tensorboard_histograms(
  133. model=self.learn.model, iteration=iteration, tbwriter=self.tbwriter)
  134. #TODO: Relying on a specific hardcoded start_idx here isn't great. Is there a better solution?
  135. def _write_metrics(self, iteration:int, last_metrics:MetricsList, start_idx:int=2):
  136. recorder = self.learn.recorder
  137. for i, name in enumerate(recorder.names[start_idx:]):
  138. if len(last_metrics) < i+1: return
  139. scalar_value = last_metrics[i]
  140. tag = self.metrics_root + name
  141. self.tbwriter.add_scalar(tag=tag, scalar_value=scalar_value, global_step=iteration)
  142. def on_batch_end(self, last_loss:Tensor, iteration:int, **kwargs):
  143. if iteration == 0: return
  144. self._update_batches_if_needed()
  145. if iteration % self.loss_iters == 0:
  146. self._write_training_loss(iteration=iteration, last_loss=last_loss)
  147. if iteration % self.weight_iters == 0:
  148. self._write_weight_histograms(iteration=iteration)
  149. # Doing stuff here that requires gradient info, because they get zeroed out afterwards in training loop
  150. def on_backward_end(self, iteration:int, **kwargs):
  151. if iteration == 0: return
  152. self._update_batches_if_needed()
  153. if iteration % self.stats_iters == 0:
  154. self._write_model_stats(iteration=iteration)
  155. def on_epoch_end(self, last_metrics:MetricsList, iteration:int, **kwargs):
  156. self._write_metrics(iteration=iteration, last_metrics=last_metrics)
  157. # TODO: We're overriding almost everything here. Seems like a good idea to question that ("is a" vs "has a")
  158. class GANTensorboardWriter(LearnerTensorboardWriter):
  159. def __init__(self, learn:Learner, base_dir:Path, name:str, loss_iters:int=25, weight_iters:int=1000,
  160. stats_iters:int=1000, visual_iters:int=100):
  161. super().__init__(learn=learn, base_dir=base_dir, name=name, loss_iters=loss_iters,
  162. weight_iters=weight_iters, stats_iters=stats_iters)
  163. self.visual_iters = visual_iters
  164. self.img_gen_vis = ImageGenVisualizer()
  165. self.gen_stats_updated = True
  166. self.crit_stats_updated = True
  167. # override
  168. def _write_weight_histograms(self, iteration:int):
  169. trainer = self.learn.gan_trainer
  170. generator = trainer.generator
  171. critic = trainer.critic
  172. self.weight_vis.write_tensorboard_histograms(
  173. model=generator, iteration=iteration, tbwriter=self.tbwriter, name='generator')
  174. self.weight_vis.write_tensorboard_histograms(
  175. model=critic, iteration=iteration, tbwriter=self.tbwriter, name='critic')
  176. # override
  177. def _write_model_stats(self, iteration:int):
  178. trainer = self.learn.gan_trainer
  179. generator = trainer.generator
  180. critic = trainer.critic
  181. # Don't want to write stats when model is not iterated on and hence has zeroed out gradients
  182. gen_mode = trainer.gen_mode
  183. if gen_mode and not self.gen_stats_updated:
  184. self.model_vis.write_tensorboard_stats(
  185. model=generator, iteration=iteration, tbwriter=self.tbwriter, name='gen_model_stats')
  186. self.gen_stats_updated = True
  187. if not gen_mode and not self.crit_stats_updated:
  188. self.model_vis.write_tensorboard_stats(
  189. model=critic, iteration=iteration, tbwriter=self.tbwriter, name='crit_model_stats')
  190. self.crit_stats_updated = True
  191. # override
  192. def _write_training_loss(self, iteration:int, last_loss:Tensor):
  193. trainer = self.learn.gan_trainer
  194. recorder = trainer.recorder
  195. if len(recorder.losses) > 0:
  196. scalar_value = to_np((recorder.losses[-1:])[0])
  197. tag = self.metrics_root + 'train_loss'
  198. self.tbwriter.add_scalar(tag=tag, scalar_value=scalar_value, global_step=iteration)
  199. def _write_images(self, iteration:int):
  200. trainer = self.learn.gan_trainer
  201. #TODO: Switching gen_mode temporarily seems a bit hacky here. Certainly not a good side-effect. Is there a better way?
  202. gen_mode = trainer.gen_mode
  203. try:
  204. trainer.switch(gen_mode=True)
  205. self.img_gen_vis.output_image_gen_visuals(learn=self.learn, trn_batch=self.trn_batch, val_batch=self.val_batch,
  206. iteration=iteration, tbwriter=self.tbwriter)
  207. finally:
  208. trainer.switch(gen_mode=gen_mode)
  209. # override
  210. def on_batch_end(self, iteration:int, **kwargs):
  211. super().on_batch_end(iteration=iteration, **kwargs)
  212. if iteration == 0: return
  213. if iteration % self.visual_iters == 0:
  214. self._write_images(iteration=iteration)
  215. # override
  216. def on_backward_end(self, iteration:int, **kwargs):
  217. if iteration == 0: return
  218. self._update_batches_if_needed()
  219. #TODO: This could perhaps be implemented as queues of requests instead but that seemed like overkill.
  220. # But I'm not the biggest fan of maintaining these boolean flags either... Review pls.
  221. if iteration % self.stats_iters == 0:
  222. self.gen_stats_updated = False
  223. self.crit_stats_updated = False
  224. if not (self.gen_stats_updated and self.crit_stats_updated):
  225. self._write_model_stats(iteration=iteration)
  226. class ImageGenTensorboardWriter(LearnerTensorboardWriter):
  227. def __init__(self, learn:Learner, base_dir:Path, name:str, loss_iters:int=25, weight_iters:int=1000,
  228. stats_iters: int = 1000, visual_iters: int = 100):
  229. super().__init__(learn=learn, base_dir=base_dir, name=name, loss_iters=loss_iters, weight_iters=weight_iters,
  230. stats_iters=stats_iters)
  231. self.visual_iters = visual_iters
  232. self.img_gen_vis = ImageGenVisualizer()
  233. def _write_images(self, iteration:int):
  234. self.img_gen_vis.output_image_gen_visuals(learn=self.learn, trn_batch=self.trn_batch, val_batch=self.val_batch,
  235. iteration=iteration, tbwriter=self.tbwriter)
  236. # override
  237. def on_batch_end(self, iteration:int, **kwargs):
  238. super().on_batch_end(iteration=iteration, **kwargs)
  239. if iteration == 0: return
  240. if iteration % self.visual_iters == 0:
  241. self._write_images(iteration=iteration)