在工具提示中显示 Numpy 图像#
在本教程中,你将学习如何在任何 Altair 图表中使用工具提示显示存储为 Numpy 数组的图像。
首先,我们创建一些示例图像数组,其中包含不同大小和形状(圆形和方形)的斑点(对象)。我们测量斑点的面积,以便在图表中获得用于比较的定量测量值。
import numpy as np
import pandas as pd
from scipy import ndimage as ndi
rng = np.random.default_rng([ord(c) for c in 'altair'])
n_rows = 200
def create_blobs(blob_shape, img_width=96, n_dim=2, sizes=[0.05, 0.1, 0.15]):
"""Helper function to create blobs in the images"""
shape = tuple([img_width] * n_dim)
mask = np.zeros(shape)
points = (img_width * rng.random(n_dim)).astype(int)
mask[tuple(indices for indices in points)] = 1
if blob_shape == 'circle':
im = ndi.gaussian_filter(mask, sigma=rng.choice(sizes) * img_width)
elif blob_shape == 'square':
im = ndi.uniform_filter(mask, size=rng.choice(sizes) * img_width, mode='constant') * rng.normal(4, size=(img_width, img_width))
return im / im.max()
df = pd.DataFrame({
'image1': [create_blobs('circle') for _ in range(n_rows)],
'image2': [create_blobs('square', sizes=[0.3, 0.4, 0.5]) for _ in range(n_rows)],
'group': rng.choice(['a', 'b', 'c'], size=n_rows)
})
# Compute the area as the proportion of pixels above a threshold
df[['image1_area', 'image2_area']] = df[['image1', 'image2']].map(lambda x: (x > 0.4).mean())
df
image1 image2 group image1_area image2_area
0 [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,... [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,... b 0.030599 0.070855
1 [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,... [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,... b 0.134006 0.167752
2 [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,... [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,... a 0.014865 0.195421
3 [[0.8408188559277083, 0.8434385804894682, 0.84... [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,... c 0.097222 0.208550
4 [[0.0, 0.0, 0.00030006935893488147, 0.00039595... [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,... b 0.072266 0.043294
.. ... ... ... ... ...
195 [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,... [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,... a 0.014865 0.100260
196 [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,... [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,... a 0.040148 0.070747
197 [[0.020642835524827916, 0.021450917851981367, ... [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,... b 0.122830 0.135091
198 [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,... [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,... a 0.064779 0.088542
199 [[0.0, 2.078157310309435e-06, 2.74224551900630... [[0.5236619475835835, 0.446206634299134, 0.398... b 0.130317 0.145942
[200 rows x 5 columns]
接下来,我们定义一个函数,将 Numpy 数组转换为base64 编码的字符串。这是工具提示识别数据是图像形式并正确渲染所必需的步骤。
from io import BytesIO
from PIL import Image, ImageDraw
import base64
def create_tooltip_image(df_row):
"""Concatenate, rescale, and convert images to base64 strings."""
# Concatenate images to show together in the tooltip
# This can be skipped if only one image is to be displayed
img_gap = np.ones([df_row['image1'].shape[0], 10]) # 10 px white gap between imgs
img_arr = np.concatenate(
[
df_row['image1'],
img_gap,
df_row['image2']
],
axis=1
)
# Create a PIL image from the array.
# Multiplying by 255 and recasting as uint8 for the images to occupy the entire supported instensity space from 0-255
img = Image.fromarray((255 * img_arr).astype('uint8'))
# Optional: Burn in labels as pixels in the images. Can be helpful to keep track of which image is which
ImageDraw.Draw(img).text((3, 0), 'im1', fill=255)
ImageDraw.Draw(img).text((3 + df_row['image1'].shape[1] + img_gap.shape[1], 0), 'im2', fill=255)
# Convert to base64 encoded image string that can be displayed in the tooltip
buffered = BytesIO()
img.save(buffered, format="PNG")
img_str = base64.b64encode(buffered.getvalue()).decode()
return f"data:image/png;base64,{img_str}"
# The column with the base64 image string must be called "image" in order for it to trigger the image rendering in the tooltip
df['image'] = df[['image1', 'image2']].apply(create_tooltip_image, axis=1)
# Dropping the image arrays since they are large an no longer needed
df_plot = df.drop(columns=['image1', 'image2'])
df_plot
group image1_area image2_area image
0 b 0.030599 0.070855 ...
1 b 0.134006 0.167752 ...
2 a 0.014865 0.195421 ...
3 c 0.097222 0.208550 ...
4 b 0.072266 0.043294 ...
.. ... ... ... ...
195 a 0.014865 0.100260 ...
196 a 0.040148 0.070747 ...
197 b 0.122830 0.135091 ...
198 a 0.064779 0.088542 ...
199 b 0.130317 0.145942 ...
[200 rows x 4 columns]
现在我们准备创建图表,当鼠标悬停在点上时,图表将显示图像作为工具提示。我们可以看到,正如预期的那样,大的白色斑点对应于较高的面积测量值。
import altair as alt
# The random() function is used to jitter points in the x-direction
alt.Chart(df_plot, width=alt.Step(40)).mark_circle(xOffset=alt.expr('random() * 16 - 8')).encode(
x='group',
y=alt.Y(alt.repeat(), type='quantitative'),
tooltip=['image'],
color='group',
).repeat(
['image1_area', 'image2_area']
).resolve_scale(
y='shared'
).properties(
title='Comparison of blob areas'
)
请注意,当将图像作为图表数据的一部分包含在内时,图表大小通常会增加数倍。上面图表的大小在不包含图像的情况下是 19 Kb,但添加图像后是 760 Kb。尽管这增加了 20 倍的大小,但 base64 编码仍然相当节省存储空间;如果我们将图像以原始 Numpy 数组格式包含在内,图表大小将达到 35Mb!
如果我们想更有趣、更精巧一些,我们可以一次显示一个图表,并根据下拉选择器动态更新 y 轴显示的内容以及图像工具提示中显示的内容。我们首先定义一个工具提示,它只包含单个图像,而不是将两个图像连接在一起。
def create_tooltip_image(img_arr):
"""Rescale and convert an image to a base64 string."""
# print(img_arr)
# Create a PIL image from the array.
# Multiplying by 255 and recasting as uint8 for the images to occupy the entire supported instensity space from 0-255
img = Image.fromarray((255 * img_arr).astype('uint8'))
# Convert to base64 encoded image string that can be displayed in the tooltip
buffered = BytesIO()
img.save(buffered, format="PNG")
img_str = base64.b64encode(buffered.getvalue()).decode()
return f"data:image/png;base64,{img_str}"
# The column with the base64 image string must be called "image" in order for it to trigger the image rendering in the tooltip
df[['image1_base64', 'image2_base64']] = df[['image1', 'image2']].map(create_tooltip_image)
# Dropping the image arrays since they are large an no longer needed
# Also drop the previous tooltip image for clarity
df_plot = df.drop(columns=['image1', 'image2', 'image'])
df_plot
group image1_area image2_area image1_base64 image2_base64
0 b 0.030599 0.070855 ... ...
1 b 0.134006 0.167752 ... ...
2 a 0.014865 0.195421 ... ...
3 c 0.097222 0.208550 ... ...
4 b 0.072266 0.043294 ... ...
.. ... ... ... ... ...
195 a 0.014865 0.100260 ... ...
196 a 0.040148 0.070747 ... ...
197 b 0.122830 0.135091 ... ...
198 a 0.064779 0.088542 ... ...
199 b 0.130317 0.145942 ... ...
[200 rows x 5 columns]
在我们的图表中,我们需要使用一个转换(transform)来根据下拉菜单中的选择动态更新 y 轴列和工具提示列。代码中的注释更详细地解释了此图表规范中的每一行做什么。
metric_dropdown = alt.binding_select(
options=['image1_area', 'image2_area'],
name='Image metric '
)
metric_param = alt.param(
value='image1_area',
bind=metric_dropdown
)
alt.hconcat(
# This first chart is the axis title and is only needed because
# Vega-Lite does not yet support passing an expression directly to the axis title
alt.Chart().mark_text(angle=270, dx=-150, fontWeight='bold').encode(
alt.TextValue(alt.expr(f'{metric_param.name}'))
),
alt.Chart(df_plot, width=alt.Step(40)).mark_circle(xOffset=alt.expr('random() * 16 - 8')).encode(
x='group',
y=alt.Y('image_area:Q').title(''),
tooltip=['image:N'],
color='group',
).properties(
title='Area of blobs'
).transform_calculate(
# This first line updates the image_area which is used for the y axis
# to correspond to the selected string in the dropdown
image_area=f'datum[{metric_param.name}]',
# Since altair needs the tooltip field to be called `image`, we need to dynamically
# change what's in the `image` field depending on the selection in the dropdown
# This is further complicated by the fact that the string in the dropdown is not
# an exact match for the column holding the image data so we need
# to replace part of the name to match to match the corresponding base 64 image field
image=f'datum[replace({metric_param.name}, "_area", "_base64")]',
)
).add_params(
metric_param
)