Source code for torchcodec.encoders._audio_encoder

from pathlib import Path
from typing import Optional, Union

import torch
from torch import Tensor

from torchcodec import _core


[docs]class AudioEncoder:
    """An audio encoder.

    Args:
        samples (``torch.Tensor``): The samples to encode. This must be a 2D
            tensor of shape ``(num_channels, num_samples)``, or a 1D tensor in
            which case ``num_channels = 1`` is assumed. Values must be float
            values in ``[-1, 1]``.
        sample_rate (int): The sample rate of the **input** ``samples``. The
            sample rate of the encoded output can be specified using the
            encoding methods (``to_file``, etc.).
    """

    def __init__(self, samples: Tensor, *, sample_rate: int):
        torch._C._log_api_usage_once("torchcodec.encoders.AudioEncoder")
        # Some of these checks are also done in C++: it's OK, they're cheap, and
        # doing them here allows to surface them when the AudioEncoder is
        # instantiated, rather than later when the encoding methods are called.
        if not isinstance(samples, Tensor):
            raise ValueError(
                f"Expected samples to be a Tensor, got {type(samples) = }."
            )
        if samples.ndim == 1:
            # make it 2D and assume 1 channel
            samples = torch.unsqueeze(samples, 0)
        if samples.ndim != 2:
            raise ValueError(f"Expected 1D or 2D samples, got {samples.shape = }.")
        if samples.dtype != torch.float32:
            raise ValueError(f"Expected float32 samples, got {samples.dtype = }.")
        if sample_rate <= 0:
            raise ValueError(f"{sample_rate = } must be > 0.")

        self._samples = samples
        self._sample_rate = sample_rate

[docs]    def to_file(
        self,
        dest: Union[str, Path],
        *,
        bit_rate: Optional[int] = None,
        num_channels: Optional[int] = None,
        sample_rate: Optional[int] = None,
    ) -> None:
        """Encode samples into a file.

        Args:
            dest (str or ``pathlib.Path``): The path to the output file, e.g.
                ``audio.mp3``. The extension of the file determines the audio
                format and container.
            bit_rate (int, optional): The output bit rate. Encoders typically
                support a finite set of bit rate values, so ``bit_rate`` will be
                matched to one of those supported values. The default is chosen
                by FFmpeg.
            num_channels (int, optional): The number of channels of the encoded
                output samples. By default, the number of channels of the input
                ``samples`` is used.
            sample_rate (int, optional): The sample rate of the encoded output.
                By default, the sample rate of the input ``samples`` is used.
        """
        _core.encode_audio_to_file(
            samples=self._samples,
            sample_rate=self._sample_rate,
            filename=str(dest),
            bit_rate=bit_rate,
            num_channels=num_channels,
            desired_sample_rate=sample_rate,
        )

[docs]    def to_tensor(
        self,
        format: str,
        *,
        bit_rate: Optional[int] = None,
        num_channels: Optional[int] = None,
        sample_rate: Optional[int] = None,
    ) -> Tensor:
        """Encode samples into raw bytes, as a 1D uint8 Tensor.

        Args:
            format (str): The format of the encoded samples, e.g. "mp3", "wav"
                or "flac".
            bit_rate (int, optional): The output bit rate. Encoders typically
                support a finite set of bit rate values, so ``bit_rate`` will be
                matched to one of those supported values. The default is chosen
                by FFmpeg.
            num_channels (int, optional): The number of channels of the encoded
                output samples. By default, the number of channels of the input
                ``samples`` is used.
            sample_rate (int, optional): The sample rate of the encoded output.
                By default, the sample rate of the input ``samples`` is used.

        Returns:
            Tensor: The raw encoded bytes as 1D uint8 Tensor.
        """
        return _core.encode_audio_to_tensor(
            samples=self._samples,
            sample_rate=self._sample_rate,
            format=format,
            bit_rate=bit_rate,
            num_channels=num_channels,
            desired_sample_rate=sample_rate,
        )

[docs]    def to_file_like(
        self,
        file_like,
        format: str,
        *,
        bit_rate: Optional[int] = None,
        num_channels: Optional[int] = None,
        sample_rate: Optional[int] = None,
    ) -> None:
        """Encode samples into a file-like object.

        Args:
            file_like: A file-like object that supports ``write()`` and
                ``seek()`` methods, such as io.BytesIO(), an open file in binary
                write mode, etc. Methods must have the following signature:
                ``write(data: bytes) -> int`` and ``seek(offset: int, whence:
                int = 0) -> int``.
            format (str): The format of the encoded samples, e.g. "mp3", "wav"
                or "flac".
            bit_rate (int, optional): The output bit rate. Encoders typically
                support a finite set of bit rate values, so ``bit_rate`` will be
                matched to one of those supported values. The default is chosen
                by FFmpeg.
            num_channels (int, optional): The number of channels of the encoded
                output samples. By default, the number of channels of the input
                ``samples`` is used.
            sample_rate (int, optional): The sample rate of the encoded output.
                By default, the sample rate of the input ``samples`` is used.
        """
        _core.encode_audio_to_file_like(
            samples=self._samples,
            sample_rate=self._sample_rate,
            format=format,
            file_like=file_like,
            bit_rate=bit_rate,
            num_channels=num_channels,
            desired_sample_rate=sample_rate,
        )
Source code for torchcodec.encoders._audio_encoder

Docs

Tutorials

Resources