vllm.utils.serial_utils ¶
EMBED_DTYPE_TO_NUMPY_DTYPE_VIEW module-attribute ¶
EMBED_DTYPE_TO_NUMPY_DTYPE_VIEW = {
"float32": float32,
"float16": float16,
"bfloat16": float16,
"fp8_e4m3": uint8,
"fp8_e5m2": uint8,
}
EMBED_DTYPE_TO_TORCH_DTYPE module-attribute ¶
EMBED_DTYPE_TO_TORCH_DTYPE = {
"float32": float32,
"float16": float16,
"bfloat16": bfloat16,
"fp8_e4m3": float8_e4m3fn,
"fp8_e5m2": float8_e5m2,
}
EMBED_DTYPE_TO_TORCH_DTYPE_VIEW module-attribute ¶
EMBED_DTYPE_TO_TORCH_DTYPE_VIEW = {
"float32": float32,
"float16": float16,
"bfloat16": float16,
"fp8_e4m3": uint8,
"fp8_e5m2": uint8,
}
EmbedDType module-attribute ¶
EmbedDType = Literal[
"float32", "float16", "bfloat16", "fp8_e4m3", "fp8_e5m2"
]
MetadataItem dataclass ¶
Source code in vllm/utils/serial_utils.py
__init__ ¶
__init__(
index: int,
embed_dtype: EmbedDType,
endianness: Endianness,
start: int,
end: int,
shape: tuple[int, ...],
) -> None
binary2tensor ¶
binary2tensor(
binary: bytes,
shape: tuple[int, ...],
embed_dtype: EmbedDType,
endianness: Endianness,
) -> Tensor
Source code in vllm/utils/serial_utils.py
decode_pooling_output ¶
decode_pooling_output(
items: list[MetadataItem], body: bytes
) -> list[Tensor]
Source code in vllm/utils/serial_utils.py
encode_pooling_bytes ¶
encode_pooling_bytes(
pooling_outputs: list[PoolingRequestOutput],
embed_dtype: EmbedDType,
endianness: Endianness,
)
Source code in vllm/utils/serial_utils.py
encode_pooling_output ¶
encode_pooling_output(
output: PoolingRequestOutput,
encoding_format: EncodingFormat,
embed_dtype: EmbedDType,
endianness: Endianness,
) -> list[float] | str | bytes
Source code in vllm/utils/serial_utils.py
tensor2binary ¶
tensor2binary(
tensor: Tensor,
embed_dtype: EmbedDType,
endianness: Endianness,
) -> bytes