diff --git a/bin/toucan-render/App.cpp b/bin/toucan-render/App.cpp
index 83f658e..e4c22e3 100644
--- a/bin/toucan-render/App.cpp
+++ b/bin/toucan-render/App.cpp
@@ -3,6 +3,7 @@
 
 #include "App.h"
 
+#include <toucanRender/FFmpegAudioWrite.h>
 #include <toucanRender/FFmpegWrite.h>
 #include <toucanRender/Read.h>
 #include <toucanRender/Util.h>
@@ -17,6 +18,7 @@ extern "C"
 
 } // extern "C"
 
+#include <cmath>
 #include <stdio.h>
 
 namespace toucan
@@ -42,7 +44,7 @@ namespace toucan
             { "444p16", OIIO::ImageSpec(0, 0, 3, OIIO::TypeDesc::BASETYPE::UINT16) }
         };
     }
-    
+
     void App::_init(
         const std::shared_ptr<ftk::Context>& context,
         std::vector<std::string>& argv)
@@ -98,6 +100,29 @@ namespace toucan
             std::vector<std::string>{ "-v" },
             "Print verbose output.");
 
+        _cmdLine.audioCodec = ftk::CmdLineValueOption<std::string>::create(
+            std::vector<std::string>{ "-acodec" },
+            "Set the audio codec.",
+            "",
+            "pcm_s16le",
+            ftk::join(ffmpeg::getAudioCodecStrings(), ", "));
+        _cmdLine.audioSampleRate = ftk::CmdLineValueOption<int>::create(
+            std::vector<std::string>{ "-arate" },
+            "Set the audio sample rate.",
+            "",
+            48000);
+        _cmdLine.audioChannelCount = ftk::CmdLineValueOption<int>::create(
+            std::vector<std::string>{ "-achannels" },
+            "Set the audio channel count.",
+            "",
+            2);
+        _cmdLine.audioFile = ftk::CmdLineValueOption<std::string>::create(
+            std::vector<std::string>{ "-afile" },
+            "Write audio to a separate file.");
+        _cmdLine.noAudio = ftk::CmdLineFlagOption::create(
+            std::vector<std::string>{ "-no_audio" },
+            "Disable audio output.");
+
         IApp::_init(
             context,
             argv,
@@ -112,7 +137,12 @@ namespace toucan
                 _cmdLine.printSize,
                 _cmdLine.raw,
                 _cmdLine.y4m,
-                _cmdLine.verbose
+                _cmdLine.verbose,
+                _cmdLine.audioCodec,
+                _cmdLine.audioSampleRate,
+                _cmdLine.audioChannelCount,
+                _cmdLine.audioFile,
+                _cmdLine.noAudio
             });
 
         if (_cmdLine.output->hasValue() && _cmdLine.output->getValue() == "-")
@@ -123,7 +153,7 @@ namespace toucan
 
     App::App()
     {}
-        
+
     App::~App()
     {
         if (_swsContext)
@@ -148,7 +178,7 @@ namespace toucan
         out->_init(context, argv);
         return out;
     }
-    
+
     void App::run()
     {
         const std::filesystem::path parentPath = std::filesystem::path(getExeName()).parent_path();
@@ -165,7 +195,7 @@ namespace toucan
         const OTIO_NS::TimeRange& timeRange = _timelineWrapper->getTimeRange();
         const OTIO_NS::RationalTime timeInc(1.0, timeRange.duration().rate());
         const int frames = timeRange.duration().value();
-        
+
         // Create the image graph.
         _graph = std::make_shared<ImageGraph>(
             _context,
@@ -195,9 +225,35 @@ namespace toucan
             return;
         }
 
+        // Audio settings.
+        const int audioSampleRate = _cmdLine.audioSampleRate->hasValue() ?
+            _cmdLine.audioSampleRate->getValue() : 48000;
+        const int audioChannelCount = _cmdLine.audioChannelCount->hasValue() ?
+            _cmdLine.audioChannelCount->getValue() : 2;
+
+        // Create the audio graph.
+        if (!_cmdLine.noAudio->found())
+        {
+            _audioGraph = std::make_shared<AudioGraph>(
+                _context,
+                inputPath.parent_path(),
+                _timelineWrapper,
+                audioSampleRate,
+                audioChannelCount);
+        }
+
         // Create the image host.
         _host = std::make_shared<ImageEffectHost>(_context, getOpenFXPluginPaths(getExeName()));
 
+        // Audio codec.
+        ffmpeg::AudioCodec audioCodec = ffmpeg::AudioCodec::PCM_S16LE;
+        if (_cmdLine.audioCodec->hasValue())
+        {
+            ffmpeg::fromString(_cmdLine.audioCodec->getValue(), audioCodec);
+        }
+
+        const bool includeAudio = _audioGraph && _audioGraph->hasAudio();
+
         // Open the movie file.
         std::shared_ptr<ffmpeg::Write> ffWrite;
         if (hasExtension(outputPath.extension().string(), MovieReadNode::getExtensions()))
@@ -211,9 +267,26 @@ namespace toucan
                 outputPath,
                 OIIO::ImageSpec(imageSize.x, imageSize.y, 3),
                 timeRange,
-                videoCodec);
+                videoCodec,
+                includeAudio ? audioSampleRate : 0,
+                includeAudio ? audioChannelCount : 0,
+                audioCodec);
+        }
+
+        // Open the separate audio file.
+        std::shared_ptr<ffmpeg::AudioWrite> audioFileWrite;
+        if (_cmdLine.audioFile->hasValue() && includeAudio)
+        {
+            audioFileWrite = std::make_shared<ffmpeg::AudioWrite>(
+                std::filesystem::path(_cmdLine.audioFile->getValue()),
+                audioSampleRate,
+                audioChannelCount,
+                audioCodec);
         }
 
+        const int samplesPerFrame = static_cast<int>(
+            std::round(static_cast<double>(audioSampleRate) / timeRange.duration().rate()));
+
         // Render the timeline frames.
         if (_cmdLine.y4m->hasValue())
         {
@@ -261,6 +334,21 @@ namespace toucan
                     _writeY4mFrame(buf);
                 }
             }
+
+            // Render and write audio for this frame.
+            if (includeAudio)
+            {
+                const AudioBuffer audioBuf = _audioGraph->exec(time, samplesPerFrame);
+
+                if (ffWrite)
+                {
+                    ffWrite->writeAudio(audioBuf);
+                }
+                if (audioFileWrite)
+                {
+                    audioFileWrite->writeAudio(audioBuf);
+                }
+            }
         }
     }
 
@@ -484,4 +572,3 @@ namespace toucan
         }
     }
 }
-
diff --git a/bin/toucan-render/App.h b/bin/toucan-render/App.h
index c73a206..d45dbbe 100644
--- a/bin/toucan-render/App.h
+++ b/bin/toucan-render/App.h
@@ -3,6 +3,7 @@
 
 #pragma once
 
+#include <toucanRender/AudioGraph.h>
 #include <toucanRender/ImageEffectHost.h>
 #include <toucanRender/ImageGraph.h>
 #include <toucanRender/TimelineWrapper.h>
@@ -31,18 +32,18 @@ namespace toucan
 
     public:
         ~App();
-        
+
         static std::shared_ptr<App> create(
             const std::shared_ptr<ftk::Context>&,
             std::vector<std::string>&);
 
         void run() override;
-    
+
     private:
         void _writeRawFrame(const OIIO::ImageBuf&);
         void _writeY4mHeader();
         void _writeY4mFrame(const OIIO::ImageBuf&);
-        
+
         struct CmdLine
         {
             std::shared_ptr<ftk::CmdLineValueArg<std::string> > input;
@@ -57,11 +58,18 @@ namespace toucan
             std::shared_ptr<ftk::CmdLineValueOption<std::string> > raw;
             std::shared_ptr<ftk::CmdLineValueOption<std::string> > y4m;
             std::shared_ptr<ftk::CmdLineFlagOption> verbose;
+
+            std::shared_ptr<ftk::CmdLineValueOption<std::string> > audioCodec;
+            std::shared_ptr<ftk::CmdLineValueOption<int> > audioSampleRate;
+            std::shared_ptr<ftk::CmdLineValueOption<int> > audioChannelCount;
+            std::shared_ptr<ftk::CmdLineValueOption<std::string> > audioFile;
+            std::shared_ptr<ftk::CmdLineFlagOption> noAudio;
         };
         CmdLine _cmdLine;
 
         std::shared_ptr<TimelineWrapper> _timelineWrapper;
         std::shared_ptr<ImageGraph> _graph;
+        std::shared_ptr<AudioGraph> _audioGraph;
         std::shared_ptr<ImageEffectHost> _host;
 
         AVFrame* _avFrame = nullptr;
@@ -71,4 +79,3 @@ namespace toucan
         SwsContext* _swsContext = nullptr;
     };
 }
-
diff --git a/cmake/SuperBuild/BuildFFmpeg.cmake b/cmake/SuperBuild/BuildFFmpeg.cmake
index 0ab8904..a8a9572 100644
--- a/cmake/SuperBuild/BuildFFmpeg.cmake
+++ b/cmake/SuperBuild/BuildFFmpeg.cmake
@@ -100,6 +100,7 @@ set(FFmpeg_CONFIGURE_ARGS
 if(toucan_FFmpeg_MINIMAL)
     list(APPEND FFmpeg_CONFIGURE_ARGS
         --disable-decoders
+        --enable-decoder=aac
         --enable-decoder=apv
         --enable-decoder=av1
         --enable-decoder=flac
@@ -151,6 +152,7 @@ if(toucan_FFmpeg_MINIMAL)
         --enable-decoder=vp9
         --enable-decoder=yuv4
         --disable-encoders
+        --enable-encoder=aac
         --enable-encoder=flac
         --enable-encoder=mjpeg
         --enable-encoder=mpeg2video
@@ -265,6 +267,7 @@ if(toucan_FFmpeg_MINIMAL)
         --enable-muxer=wav
         --enable-muxer=yuv4mpegpipe
         --disable-parsers
+        --enable-parser=aac
         --enable-parser=apv
         --enable-parser=av1
         --enable-parser=flac
diff --git a/lib/toucanRender/AudioBuffer.h b/lib/toucanRender/AudioBuffer.h
new file mode 100644
index 0000000..9227425
--- /dev/null
+++ b/lib/toucanRender/AudioBuffer.h
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright Contributors to the toucan project.
+
+#pragma once
+
+#include <cstddef>
+#include <vector>
+
+namespace toucan
+{
+    struct AudioBuffer
+    {
+        std::vector<float> data;
+        int sampleRate = 0;
+        int channelCount = 0;
+        int sampleCount = 0;
+
+        bool isValid() const { return !data.empty() && sampleRate > 0; }
+        size_t byteCount() const { return data.size() * sizeof(float); }
+    };
+}
diff --git a/lib/toucanRender/AudioGraph.cpp b/lib/toucanRender/AudioGraph.cpp
new file mode 100644
index 0000000..5d30268
--- /dev/null
+++ b/lib/toucanRender/AudioGraph.cpp
@@ -0,0 +1,392 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright Contributors to the toucan project.
+
+#include "AudioGraph.h"
+
+#include "TimelineAlgo.h"
+#include "Util.h"
+
+#include <ftk/Core/LogSystem.h>
+
+#include <opentimelineio/clip.h>
+#include <opentimelineio/externalReference.h>
+#include <opentimelineio/gap.h>
+#include <opentimelineio/linearTimeWarp.h>
+
+#include <algorithm>
+#include <cmath>
+
+namespace toucan
+{
+    namespace
+    {
+        const std::string logPrefix = "toucan::AudioGraph";
+    }
+
+    AudioGraph::AudioGraph(
+        const std::shared_ptr<ftk::Context>& context,
+        const std::filesystem::path& path,
+        const std::shared_ptr<TimelineWrapper>& timelineWrapper,
+        int sampleRate,
+        int channelCount) :
+        _context(context),
+        _path(path),
+        _timelineWrapper(timelineWrapper),
+        _timeRange(timelineWrapper->getTimeRange()),
+        _sampleRate(sampleRate),
+        _channelCount(channelCount)
+    {
+        _audioReadCache.setMax(20);
+
+        for (const auto& child : _timelineWrapper->getTimeline()->tracks()->children())
+        {
+            if (auto track = OTIO_NS::dynamic_retainer_cast<OTIO_NS::Track>(child))
+            {
+                if (track->kind() == OTIO_NS::Track::Kind::audio &&
+                    !track->find_clips().empty())
+                {
+                    _hasAudio = true;
+                    break;
+                }
+            }
+        }
+
+        if (!_hasAudio)
+        {
+            for (const auto& child : _timelineWrapper->getTimeline()->tracks()->children())
+            {
+                if (auto track = OTIO_NS::dynamic_retainer_cast<OTIO_NS::Track>(child))
+                {
+                    if (track->kind() == OTIO_NS::Track::Kind::video)
+                    {
+                        for (auto clip : track->find_clips())
+                        {
+                            if (auto externalRef = dynamic_cast<OTIO_NS::ExternalReference*>(
+                                clip->media_reference()))
+                            {
+                                try
+                                {
+                                    const std::string mediaPath =
+                                        _timelineWrapper->getMediaPath(externalRef->target_url());
+                                    auto audioRead = std::make_shared<ffmpeg::AudioRead>(
+                                        mediaPath, _sampleRate, _channelCount);
+                                    if (audioRead->hasAudio())
+                                    {
+                                        _hasAudio = true;
+                                        _audioReadCache.add(externalRef, audioRead);
+                                        break;
+                                    }
+                                }
+                                catch (const std::exception&)
+                                {
+                                }
+                            }
+                        }
+                        if (_hasAudio) break;
+                    }
+                }
+            }
+        }
+    }
+
+    AudioGraph::~AudioGraph()
+    {}
+
+    int AudioGraph::getSampleRate() const
+    {
+        return _sampleRate;
+    }
+
+    int AudioGraph::getChannelCount() const
+    {
+        return _channelCount;
+    }
+
+    bool AudioGraph::hasAudio() const
+    {
+        return _hasAudio;
+    }
+
+    AudioBuffer AudioGraph::exec(const OTIO_NS::RationalTime& time, int sampleCount)
+    {
+        AudioBuffer out;
+        out.sampleRate = _sampleRate;
+        out.channelCount = _channelCount;
+        out.sampleCount = sampleCount;
+        out.data.resize(sampleCount * _channelCount, 0.F);
+
+        auto stack = _timelineWrapper->getTimeline()->tracks();
+        const auto& stackEffects = stack->effects();
+        OTIO_NS::RationalTime t = time - _timeRange.start_time();
+        t = _timeWarps(t, stack->available_range(), stackEffects);
+
+        int trackCount = 0;
+        for (const auto& i : stack->children())
+        {
+            if (auto track = OTIO_NS::dynamic_retainer_cast<OTIO_NS::Track>(i))
+            {
+                bool processTrack = false;
+                if (track->kind() == OTIO_NS::Track::Kind::audio &&
+                    !track->find_clips().empty())
+                {
+                    processTrack = true;
+                }
+                else if (track->kind() == OTIO_NS::Track::Kind::video &&
+                    !track->find_clips().empty())
+                {
+                    processTrack = true;
+                }
+
+                if (processTrack)
+                {
+                    const auto& trackEffects = track->effects();
+                    OTIO_NS::RationalTime t2 = t;
+                    if (!trackEffects.empty())
+                    {
+                        t2 = _timeWarps(t2, track->available_range(), trackEffects);
+                    }
+
+                    AudioBuffer trackBuf = _track(t2, sampleCount, track);
+
+                    _applyEffects(trackBuf, trackEffects);
+
+                    if (trackBuf.isValid())
+                    {
+                        for (size_t j = 0; j < out.data.size() && j < trackBuf.data.size(); ++j)
+                        {
+                            out.data[j] += trackBuf.data[j];
+                        }
+                        ++trackCount;
+                    }
+                }
+            }
+        }
+
+        for (size_t j = 0; j < out.data.size(); ++j)
+        {
+            out.data[j] = std::max(-1.F, std::min(1.F, out.data[j]));
+        }
+
+        return out;
+    }
+
+    AudioBuffer AudioGraph::_track(
+        const OTIO_NS::RationalTime& time,
+        int sampleCount,
+        const OTIO_NS::SerializableObject::Retainer<OTIO_NS::Track>& track)
+    {
+        AudioBuffer out;
+        out.sampleRate = _sampleRate;
+        out.channelCount = _channelCount;
+        out.sampleCount = sampleCount;
+        out.data.resize(sampleCount * _channelCount, 0.F);
+
+        OTIO_NS::SerializableObject::Retainer<OTIO_NS::Item> item;
+        OTIO_NS::SerializableObject::Retainer<OTIO_NS::Composable> prev;
+        OTIO_NS::SerializableObject::Retainer<OTIO_NS::Composable> prev2;
+        OTIO_NS::SerializableObject::Retainer<OTIO_NS::Composable> next;
+        OTIO_NS::SerializableObject::Retainer<OTIO_NS::Composable> next2;
+        const auto& children = track->children();
+        for (size_t i = 0; i < children.size(); ++i)
+        {
+            if ((item = OTIO_NS::dynamic_retainer_cast<OTIO_NS::Item>(children[i])))
+            {
+                const auto trimmedRangeInParent = item->trimmed_range_in_parent();
+                if (trimmedRangeInParent.has_value() && trimmedRangeInParent.value().contains(time))
+                {
+                    out = _item(
+                        track->transformed_time(time, item),
+                        sampleCount,
+                        item);
+                    if (i > 0)
+                    {
+                        prev = children[i - 1];
+                    }
+                    if (i > 1)
+                    {
+                        prev2 = children[i - 2];
+                    }
+                    if (i < (children.size() - 1))
+                    {
+                        next = children[i + 1];
+                    }
+                    if (children.size() > 1 && i < (children.size() - 2))
+                    {
+                        next2 = children[i + 2];
+                    }
+                    break;
+                }
+            }
+        }
+
+        if (item)
+        {
+            if (auto prevTransition = OTIO_NS::dynamic_retainer_cast<OTIO_NS::Transition>(prev))
+            {
+                const auto trimmedRangeInParent = prevTransition->trimmed_range_in_parent();
+                if (trimmedRangeInParent.has_value() && trimmedRangeInParent.value().contains(time))
+                {
+                    if (auto prevItem = OTIO_NS::dynamic_retainer_cast<OTIO_NS::Item>(prev2))
+                    {
+                        const double value =
+                            (time - trimmedRangeInParent.value().start_time()).value() /
+                            trimmedRangeInParent.value().duration().value();
+
+                        AudioBuffer a = _item(
+                            track->transformed_time(time, prevItem),
+                            sampleCount,
+                            prevItem);
+
+                        if (a.isValid() && out.isValid())
+                        {
+                            for (size_t j = 0; j < out.data.size() && j < a.data.size(); ++j)
+                            {
+                                out.data[j] = a.data[j] * static_cast<float>(1.0 - value) +
+                                    out.data[j] * static_cast<float>(value);
+                            }
+                        }
+                    }
+                }
+            }
+            if (auto nextTransition = OTIO_NS::dynamic_retainer_cast<OTIO_NS::Transition>(next))
+            {
+                const auto trimmedRangeInParent = nextTransition->trimmed_range_in_parent();
+                if (trimmedRangeInParent.has_value() && trimmedRangeInParent.value().contains(time))
+                {
+                    if (auto nextItem = OTIO_NS::dynamic_retainer_cast<OTIO_NS::Item>(next2))
+                    {
+                        const double value =
+                            (time - trimmedRangeInParent.value().start_time()).value() /
+                            trimmedRangeInParent.value().duration().value();
+
+                        AudioBuffer b = _item(
+                            track->transformed_time(time, nextItem),
+                            sampleCount,
+                            nextItem);
+
+                        if (b.isValid() && out.isValid())
+                        {
+                            for (size_t j = 0; j < out.data.size() && j < b.data.size(); ++j)
+                            {
+                                out.data[j] = out.data[j] * static_cast<float>(1.0 - value) +
+                                    b.data[j] * static_cast<float>(value);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        return out;
+    }
+
+    AudioBuffer AudioGraph::_item(
+        const OTIO_NS::RationalTime& time,
+        int sampleCount,
+        const OTIO_NS::SerializableObject::Retainer<OTIO_NS::Item>& item)
+    {
+        AudioBuffer out;
+        out.sampleRate = _sampleRate;
+        out.channelCount = _channelCount;
+        out.sampleCount = sampleCount;
+        out.data.resize(sampleCount * _channelCount, 0.F);
+
+        OTIO_NS::RationalTime t = time;
+
+        const auto& effects = item->effects();
+        t = _timeWarps(t, item->available_range(), effects);
+
+        if (auto clip = OTIO_NS::dynamic_retainer_cast<OTIO_NS::Clip>(item))
+        {
+            auto mediaRef = clip->media_reference();
+            if (auto externalRef = dynamic_cast<OTIO_NS::ExternalReference*>(mediaRef))
+            {
+                std::shared_ptr<ffmpeg::AudioRead> audioRead;
+                if (!_audioReadCache.get(externalRef, audioRead))
+                {
+                    try
+                    {
+                        const std::string mediaPath =
+                            _timelineWrapper->getMediaPath(externalRef->target_url());
+                        audioRead = std::make_shared<ffmpeg::AudioRead>(
+                            mediaPath, _sampleRate, _channelCount);
+                        _audioReadCache.add(externalRef, audioRead);
+                    }
+                    catch (const std::exception& e)
+                    {
+                        _context.lock()->getSystem<ftk::LogSystem>()->print(
+                            logPrefix,
+                            e.what(),
+                            ftk::LogType::Error);
+                    }
+                }
+                if (audioRead && audioRead->hasAudio())
+                {
+                    if (clip->available_range().start_time() !=
+                        audioRead->getTimeRange().start_time())
+                    {
+                        t -= clip->available_range().start_time();
+                    }
+
+                    out = audioRead->getAudio(t, sampleCount);
+                }
+            }
+        }
+        else if (auto gap = OTIO_NS::dynamic_retainer_cast<OTIO_NS::Gap>(item))
+        {
+        }
+
+        _applyEffects(out, effects);
+
+        return out;
+    }
+
+    OTIO_NS::RationalTime AudioGraph::_timeWarps(
+        const OTIO_NS::RationalTime& time,
+        const OTIO_NS::TimeRange& timeRange,
+        const std::vector<OTIO_NS::SerializableObject::Retainer<OTIO_NS::Effect> >& effects)
+    {
+        OTIO_NS::RationalTime out = time;
+        for (const auto& effect : effects)
+        {
+            if (auto linearTimeWarp = dynamic_cast<OTIO_NS::LinearTimeWarp*>(effect.value))
+            {
+                const double s = linearTimeWarp->time_scalar();
+                out = OTIO_NS::RationalTime(
+                    (out - timeRange.start_time()).value() * s,
+                    time.rate()).round();
+            }
+        }
+        return out;
+    }
+
+    void AudioGraph::_applyEffects(
+        AudioBuffer& buffer,
+        const std::vector<OTIO_NS::SerializableObject::Retainer<OTIO_NS::Effect> >& effects)
+    {
+        for (const auto& effect : effects)
+        {
+            if (dynamic_cast<OTIO_NS::LinearTimeWarp*>(effect.value))
+            {
+                continue;
+            }
+
+            const auto& metaData = effect->metadata();
+            auto volumeIt = metaData.find("volume");
+            if (volumeIt != metaData.end() && volumeIt->second.has_value())
+            {
+                try
+                {
+                    const float volume = std::any_cast<double>(volumeIt->second);
+                    for (size_t i = 0; i < buffer.data.size(); ++i)
+                    {
+                        buffer.data[i] *= volume;
+                    }
+                }
+                catch (const std::bad_any_cast&)
+                {
+                }
+            }
+        }
+    }
+}
diff --git a/lib/toucanRender/AudioGraph.h b/lib/toucanRender/AudioGraph.h
new file mode 100644
index 0000000..ade069e
--- /dev/null
+++ b/lib/toucanRender/AudioGraph.h
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright Contributors to the toucan project.
+
+#pragma once
+
+#include <toucanRender/AudioBuffer.h>
+#include <toucanRender/FFmpegAudioRead.h>
+#include <toucanRender/TimelineWrapper.h>
+
+#include <ftk/Core/Context.h>
+#include <ftk/Core/LRUCache.h>
+
+#include <opentimelineio/track.h>
+#include <opentimelineio/transition.h>
+
+#include <filesystem>
+#include <memory>
+
+namespace toucan
+{
+    class AudioGraph : public std::enable_shared_from_this<AudioGraph>
+    {
+    public:
+        AudioGraph(
+            const std::shared_ptr<ftk::Context>&,
+            const std::filesystem::path&,
+            const std::shared_ptr<TimelineWrapper>&,
+            int sampleRate,
+            int channelCount);
+
+        ~AudioGraph();
+
+        int getSampleRate() const;
+        int getChannelCount() const;
+        bool hasAudio() const;
+
+        AudioBuffer exec(const OTIO_NS::RationalTime& time, int sampleCount);
+
+    private:
+        AudioBuffer _track(
+            const OTIO_NS::RationalTime&,
+            int sampleCount,
+            const OTIO_NS::SerializableObject::Retainer<OTIO_NS::Track>&);
+
+        AudioBuffer _item(
+            const OTIO_NS::RationalTime&,
+            int sampleCount,
+            const OTIO_NS::SerializableObject::Retainer<OTIO_NS::Item>&);
+
+        OTIO_NS::RationalTime _timeWarps(
+            const OTIO_NS::RationalTime&,
+            const OTIO_NS::TimeRange&,
+            const std::vector<OTIO_NS::SerializableObject::Retainer<OTIO_NS::Effect> >&);
+
+        void _applyEffects(
+            AudioBuffer&,
+            const std::vector<OTIO_NS::SerializableObject::Retainer<OTIO_NS::Effect> >&);
+
+        std::weak_ptr<ftk::Context> _context;
+        std::filesystem::path _path;
+        std::shared_ptr<TimelineWrapper> _timelineWrapper;
+        OTIO_NS::TimeRange _timeRange;
+        int _sampleRate = 48000;
+        int _channelCount = 2;
+        bool _hasAudio = false;
+
+        ftk::LRUCache<const OTIO_NS::MediaReference*, std::shared_ptr<ffmpeg::AudioRead> > _audioReadCache;
+    };
+}
diff --git a/lib/toucanRender/CMakeLists.txt b/lib/toucanRender/CMakeLists.txt
index 02aa75c..845efc4 100644
--- a/lib/toucanRender/CMakeLists.txt
+++ b/lib/toucanRender/CMakeLists.txt
@@ -1,6 +1,10 @@
 set(HEADERS
+    AudioBuffer.h
+    AudioGraph.h
     Comp.h
     FFmpeg.h
+    FFmpegAudioRead.h
+    FFmpegAudioWrite.h
     FFmpegRead.h
     FFmpegWrite.h
     ImageEffect.h
@@ -17,8 +21,11 @@ set(HEADERS
     Util.h)
 set(HEADERS_PRIVATE)
 set(SOURCE
+    AudioGraph.cpp
     Comp.cpp
     FFmpeg.cpp
+    FFmpegAudioRead.cpp
+    FFmpegAudioWrite.cpp
     FFmpegRead.cpp
     FFmpegWrite.cpp
     ImageEffect.cpp
@@ -44,6 +51,10 @@ else()
 endif()
 
 add_library(toucanRender ${HEADERS} ${HEADERS_PRIVATE} ${SOURCE})
+find_library(SWRESAMPLE_LIBRARY swresample PATHS ${CMAKE_INSTALL_PREFIX}/lib NO_DEFAULT_PATH)
+if(NOT SWRESAMPLE_LIBRARY)
+    find_library(SWRESAMPLE_LIBRARY swresample)
+endif()
 set(LIBS_PUBLIC
     toucanResource
     ftk::ftkCore
@@ -52,6 +63,9 @@ set(LIBS_PUBLIC
     lunasvg::lunasvg
     OpenImageIO::OpenImageIO
     MINIZIP::minizip)
+if(SWRESAMPLE_LIBRARY)
+    list(APPEND LIBS_PUBLIC ${SWRESAMPLE_LIBRARY})
+endif()
 if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9)
     list(APPEND LIBS_PUBLIC stdc++fs)
 endif()
diff --git a/lib/toucanRender/FFmpeg.cpp b/lib/toucanRender/FFmpeg.cpp
index 927a094..b42dc88 100644
--- a/lib/toucanRender/FFmpeg.cpp
+++ b/lib/toucanRender/FFmpeg.cpp
@@ -71,6 +71,49 @@ namespace toucan
                 AV_PROFILE_UNKNOWN,
                 AV_PROFILE_AV1_MAIN
             };
+
+            std::vector<std::pair<int, std::string> > _getAudioCodecs()
+            {
+                std::vector<std::pair<int, std::string> > out;
+                const AVCodec* avCodec = nullptr;
+                void* avCodecIterate = nullptr;
+                while ((avCodec = av_codec_iterate(&avCodecIterate)))
+                {
+                    if (av_codec_is_encoder(avCodec) &&
+                        AVMEDIA_TYPE_AUDIO == avcodec_get_type(avCodec->id))
+                    {
+                        out.push_back({ avCodec->id, avCodec->name });
+                    }
+                }
+                return out;
+            }
+
+            const std::vector<std::string> audioCodecStrings =
+            {
+                "pcm_s16le",
+                "pcm_s24le",
+                "pcm_f32le",
+                "flac",
+                "aac"
+            };
+
+            const std::vector<AVCodecID> audioCodecIds =
+            {
+                AV_CODEC_ID_PCM_S16LE,
+                AV_CODEC_ID_PCM_S24LE,
+                AV_CODEC_ID_PCM_F32LE,
+                AV_CODEC_ID_FLAC,
+                AV_CODEC_ID_AAC
+            };
+
+            const std::vector<AVSampleFormat> audioSampleFormats =
+            {
+                AV_SAMPLE_FMT_S16,
+                AV_SAMPLE_FMT_S32,
+                AV_SAMPLE_FMT_FLT,
+                AV_SAMPLE_FMT_S32,
+                AV_SAMPLE_FMT_FLTP
+            };
         }
 
         std::vector<VideoCodec> getVideoCodecs()
@@ -122,6 +165,55 @@ namespace toucan
             return videoCodecProfiles[static_cast<size_t>(value)];
         }
 
+        std::vector<AudioCodec> getAudioCodecs()
+        {
+            std::vector<AudioCodec> out;
+            for (const auto& i : _getAudioCodecs())
+            {
+                for (size_t j = 0; j < audioCodecIds.size(); ++j)
+                {
+                    if (i.first == audioCodecIds[j])
+                    {
+                        out.push_back(static_cast<AudioCodec>(j));
+                    }
+                }
+            }
+            return out;
+        }
+
+        std::vector<std::string> getAudioCodecStrings()
+        {
+            std::vector<std::string> out;
+            for (const auto& i : getAudioCodecs())
+            {
+                out.push_back(toString(i));
+            }
+            return out;
+        }
+
+        std::string toString(AudioCodec value)
+        {
+            return audioCodecStrings[static_cast<size_t>(value)];
+        }
+
+        void fromString(const std::string& s, AudioCodec& value)
+        {
+            const auto i = std::find(audioCodecStrings.begin(), audioCodecStrings.end(), s);
+            value = i != audioCodecStrings.end() ?
+                static_cast<AudioCodec>(i - audioCodecStrings.begin()) :
+                AudioCodec::First;
+        }
+
+        AVCodecID getAudioCodecId(AudioCodec value)
+        {
+            return audioCodecIds[static_cast<size_t>(value)];
+        }
+
+        AVSampleFormat getAudioSampleFormat(AudioCodec value)
+        {
+            return audioSampleFormats[static_cast<size_t>(value)];
+        }
+
         std::string getErrorLabel(int r)
         {
             char buf[4096];
diff --git a/lib/toucanRender/FFmpeg.h b/lib/toucanRender/FFmpeg.h
index a14413c..95ec224 100644
--- a/lib/toucanRender/FFmpeg.h
+++ b/lib/toucanRender/FFmpeg.h
@@ -11,6 +11,7 @@ extern "C"
 #include <libavcodec/avcodec.h>
 #include <libavutil/log.h>
 #include <libavutil/rational.h>
+#include <libavutil/samplefmt.h>
 }
 
 namespace toucan
@@ -52,6 +53,37 @@ namespace toucan
         //! Get a video codec profile.
         int getVideoCodecProfile(VideoCodec);
 
+        //! Audio codecs.
+        enum class AudioCodec
+        {
+            PCM_S16LE,
+            PCM_S24LE,
+            PCM_F32LE,
+            FLAC,
+            AAC,
+
+            Count,
+            First = PCM_S16LE
+        };
+
+        //! Get a list of audio codecs.
+        std::vector<AudioCodec> getAudioCodecs();
+
+        //! Get a list of audio codec strings.
+        std::vector<std::string> getAudioCodecStrings();
+
+        //! Convert an audio codec to a string.
+        std::string toString(AudioCodec);
+
+        //! Convert a string to an audio codec.
+        void fromString(const std::string&, AudioCodec&);
+
+        //! Get an audio codec ID.
+        AVCodecID getAudioCodecId(AudioCodec);
+
+        //! Get an audio sample format.
+        AVSampleFormat getAudioSampleFormat(AudioCodec);
+
         //! FFmpeg log callback.
         void log(void*, int level, const char* fmt, va_list vl);
 
diff --git a/lib/toucanRender/FFmpegAudioRead.cpp b/lib/toucanRender/FFmpegAudioRead.cpp
new file mode 100644
index 0000000..28f27e5
--- /dev/null
+++ b/lib/toucanRender/FFmpegAudioRead.cpp
@@ -0,0 +1,493 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright Contributors to the toucan project.
+
+#include "FFmpegAudioRead.h"
+
+#include <iostream>
+#include <sstream>
+#include <cmath>
+
+extern "C"
+{
+#include <libavutil/opt.h>
+#include <libavutil/channel_layout.h>
+}
+
+namespace toucan
+{
+    namespace ffmpeg
+    {
+        namespace
+        {
+            const size_t avIOContextBufferSize = 4096;
+
+            class Packet
+            {
+            public:
+                Packet()
+                {
+                    p = av_packet_alloc();
+                }
+
+                ~Packet()
+                {
+                    av_packet_free(&p);
+                }
+
+                AVPacket* p = nullptr;
+            };
+        }
+
+        AudioRead::AudioRead(
+            const std::filesystem::path& path,
+            int outputSampleRate,
+            int outputChannelCount,
+            const MemoryReference& memoryReference) :
+            _path(path),
+            _memoryReference(memoryReference),
+            _outputSampleRate(outputSampleRate),
+            _outputChannelCount(outputChannelCount)
+        {
+            av_log_set_level(AV_LOG_QUIET);
+
+            if (memoryReference.isValid())
+            {
+                _avFormatContext = avformat_alloc_context();
+                if (!_avFormatContext)
+                {
+                    throw std::runtime_error("Cannot allocate format context");
+                }
+
+                _avIOBufferData = AVIOBufferData(
+                    reinterpret_cast<const uint8_t*>(memoryReference.getData()),
+                    memoryReference.getSize());
+                _avIOContextBuffer = static_cast<uint8_t*>(av_malloc(avIOContextBufferSize));
+                _avIOContext = avio_alloc_context(
+                    _avIOContextBuffer,
+                    avIOContextBufferSize,
+                    0,
+                    &_avIOBufferData,
+                    &_avIOBufferRead,
+                    nullptr,
+                    &_avIOBufferSeek);
+                if (!_avIOContext)
+                {
+                    throw std::runtime_error("Cannot allocate I/O context");
+                }
+
+                _avFormatContext->pb = _avIOContext;
+            }
+
+            const std::string fileName = path.string();
+            int r = avformat_open_input(
+                &_avFormatContext,
+                !_avFormatContext ? fileName.c_str() : nullptr,
+                nullptr,
+                nullptr);
+            if (r < 0 || !_avFormatContext)
+            {
+                throw std::runtime_error("Cannot open file");
+            }
+
+            r = avformat_find_stream_info(_avFormatContext, nullptr);
+            if (r < 0)
+            {
+                throw std::runtime_error("Cannot find stream info");
+            }
+
+            for (unsigned int i = 0; i < _avFormatContext->nb_streams; ++i)
+            {
+                if (AVMEDIA_TYPE_AUDIO == _avFormatContext->streams[i]->codecpar->codec_type &&
+                    AV_DISPOSITION_DEFAULT == _avFormatContext->streams[i]->disposition)
+                {
+                    _avStream = i;
+                    break;
+                }
+            }
+            if (-1 == _avStream)
+            {
+                for (unsigned int i = 0; i < _avFormatContext->nb_streams; ++i)
+                {
+                    if (AVMEDIA_TYPE_AUDIO == _avFormatContext->streams[i]->codecpar->codec_type)
+                    {
+                        _avStream = i;
+                        break;
+                    }
+                }
+            }
+
+            if (_avStream != -1)
+            {
+                auto avAudioStream = _avFormatContext->streams[_avStream];
+                auto avAudioCodecParameters = avAudioStream->codecpar;
+                auto avAudioCodec = avcodec_find_decoder(avAudioCodecParameters->codec_id);
+                if (!avAudioCodec)
+                {
+                    throw std::runtime_error("No audio codec found");
+                }
+                _avCodecParameters = avcodec_parameters_alloc();
+                if (!_avCodecParameters)
+                {
+                    throw std::runtime_error("Cannot allocate parameters");
+                }
+                avcodec_parameters_copy(_avCodecParameters, avAudioCodecParameters);
+                _avCodecContext = avcodec_alloc_context3(avAudioCodec);
+                if (!_avCodecContext)
+                {
+                    throw std::runtime_error("Cannot allocate context");
+                }
+                avcodec_parameters_to_context(_avCodecContext, _avCodecParameters);
+                _avCodecContext->thread_count = 0;
+                _avCodecContext->thread_type = FF_THREAD_FRAME;
+                r = avcodec_open2(_avCodecContext, avAudioCodec, 0);
+                if (r < 0)
+                {
+                    throw std::runtime_error("Cannot open audio stream");
+                }
+
+                _avFrame = av_frame_alloc();
+                if (!_avFrame)
+                {
+                    throw std::runtime_error("Cannot allocate frame");
+                }
+
+                AVChannelLayout outLayout;
+                av_channel_layout_default(&outLayout, _outputChannelCount);
+
+                r = swr_alloc_set_opts2(
+                    &_swrContext,
+                    &outLayout,
+                    AV_SAMPLE_FMT_FLT,
+                    _outputSampleRate,
+                    &_avCodecParameters->ch_layout,
+                    static_cast<AVSampleFormat>(_avCodecParameters->format),
+                    _avCodecParameters->sample_rate,
+                    0,
+                    nullptr);
+                if (r < 0 || !_swrContext)
+                {
+                    throw std::runtime_error("Cannot allocate resampler context");
+                }
+                r = swr_init(_swrContext);
+                if (r < 0)
+                {
+                    throw std::runtime_error("Cannot initialize resampler");
+                }
+
+                double duration = 0.0;
+                if (avAudioStream->duration != AV_NOPTS_VALUE)
+                {
+                    duration = av_q2d(avAudioStream->time_base) * avAudioStream->duration;
+                }
+                else if (_avFormatContext->duration != AV_NOPTS_VALUE)
+                {
+                    duration = static_cast<double>(_avFormatContext->duration) / AV_TIME_BASE;
+                }
+
+                const double rate = _avCodecParameters->sample_rate;
+                const int64_t totalSamples = static_cast<int64_t>(std::round(duration * rate));
+                _timeRange = OTIO_NS::TimeRange(
+                    OTIO_NS::RationalTime(0.0, rate),
+                    OTIO_NS::RationalTime(totalSamples, rate));
+                _currentTime = OTIO_NS::RationalTime(0.0, rate);
+            }
+        }
+
+        AudioRead::~AudioRead()
+        {
+            if (_swrContext)
+            {
+                swr_free(&_swrContext);
+            }
+            if (_avFrame)
+            {
+                av_frame_free(&_avFrame);
+            }
+            if (_avCodecContext)
+            {
+                avcodec_free_context(&_avCodecContext);
+            }
+            if (_avCodecParameters)
+            {
+                avcodec_parameters_free(&_avCodecParameters);
+            }
+            if (_avIOContext)
+            {
+                avio_context_free(&_avIOContext);
+            }
+            if (_avFormatContext)
+            {
+                avformat_close_input(&_avFormatContext);
+            }
+        }
+
+        bool AudioRead::hasAudio() const
+        {
+            return _avStream != -1;
+        }
+
+        int AudioRead::getSampleRate() const
+        {
+            return _outputSampleRate;
+        }
+
+        int AudioRead::getChannelCount() const
+        {
+            return _outputChannelCount;
+        }
+
+        const OTIO_NS::TimeRange& AudioRead::getTimeRange() const
+        {
+            return _timeRange;
+        }
+
+        AudioBuffer AudioRead::getAudio(
+            const OTIO_NS::RationalTime& time,
+            int sampleCount)
+        {
+            AudioBuffer out;
+            out.sampleRate = _outputSampleRate;
+            out.channelCount = _outputChannelCount;
+            out.sampleCount = sampleCount;
+
+            if (_avStream == -1)
+            {
+                out.data.resize(sampleCount * _outputChannelCount, 0.F);
+                return out;
+            }
+
+            const OTIO_NS::RationalTime normalizedTime =
+                time.rescaled_to(_timeRange.duration().rate());
+
+            if (normalizedTime != _currentTime)
+            {
+                _seek(normalizedTime);
+            }
+
+            std::vector<float> samples;
+            samples.reserve(sampleCount * _outputChannelCount);
+
+            if (!_residual.empty())
+            {
+                const int residualSamples = static_cast<int>(
+                    _residual.size()) / _outputChannelCount;
+                if (residualSamples >= sampleCount)
+                {
+                    const int needed = sampleCount * _outputChannelCount;
+                    samples.insert(samples.end(), _residual.begin(), _residual.begin() + needed);
+                    _residual.erase(_residual.begin(), _residual.begin() + needed);
+                    out.data = std::move(samples);
+                    _currentTime = OTIO_NS::RationalTime(
+                        _currentTime.value() +
+                        static_cast<double>(sampleCount) *
+                        _timeRange.duration().rate() / _outputSampleRate,
+                        _timeRange.duration().rate());
+                    return out;
+                }
+                samples.insert(samples.end(), _residual.begin(), _residual.end());
+                _residual.clear();
+            }
+
+            const int remaining = sampleCount -
+                static_cast<int>(samples.size()) / _outputChannelCount;
+            _decode(samples, remaining);
+
+            const int totalSamples = static_cast<int>(samples.size()) / _outputChannelCount;
+            if (totalSamples > sampleCount)
+            {
+                const int needed = sampleCount * _outputChannelCount;
+                _residual.assign(samples.begin() + needed, samples.end());
+                samples.resize(needed);
+            }
+            else if (totalSamples < sampleCount)
+            {
+                samples.resize(sampleCount * _outputChannelCount, 0.F);
+            }
+
+            out.data = std::move(samples);
+            _currentTime = OTIO_NS::RationalTime(
+                _currentTime.value() +
+                static_cast<double>(sampleCount) *
+                _timeRange.duration().rate() / _outputSampleRate,
+                _timeRange.duration().rate());
+            return out;
+        }
+
+        void AudioRead::_seek(const OTIO_NS::RationalTime& time)
+        {
+            if (_avStream != -1)
+            {
+                avcodec_flush_buffers(_avCodecContext);
+                swr_close(_swrContext);
+                swr_init(_swrContext);
+
+                const double seconds =
+                    time.to_seconds() - _timeRange.start_time().to_seconds();
+                const int64_t samplePos = static_cast<int64_t>(
+                    seconds * _avCodecParameters->sample_rate);
+                const int64_t timestamp = av_rescale_q(
+                    samplePos,
+                    { 1, _avCodecParameters->sample_rate },
+                    _avFormatContext->streams[_avStream]->time_base);
+                av_seek_frame(
+                    _avFormatContext,
+                    _avStream,
+                    timestamp,
+                    AVSEEK_FLAG_BACKWARD);
+                _currentTime = time;
+                _residual.clear();
+            }
+            _eof = false;
+        }
+
+        void AudioRead::_decode(std::vector<float>& output, int sampleCount)
+        {
+            if (_avStream == -1) return;
+
+            const int initialSize = static_cast<int>(output.size());
+            const int totalNeeded = initialSize + sampleCount * _outputChannelCount;
+
+            Packet packet;
+            int decoding = 0;
+            while (0 == decoding && static_cast<int>(output.size()) < totalNeeded)
+            {
+                if (!_eof)
+                {
+                    decoding = av_read_frame(_avFormatContext, packet.p);
+                    if (AVERROR_EOF == decoding)
+                    {
+                        _eof = true;
+                        decoding = 0;
+                    }
+                    else if (decoding < 0)
+                    {
+                        break;
+                    }
+                }
+                if ((_eof && _avStream != -1) || (_avStream == packet.p->stream_index))
+                {
+                    decoding = avcodec_send_packet(
+                        _avCodecContext,
+                        _eof ? nullptr : packet.p);
+                    if (AVERROR_EOF == decoding)
+                    {
+                        decoding = 0;
+                    }
+                    else if (decoding < 0)
+                    {
+                        break;
+                    }
+
+                    while (0 == decoding)
+                    {
+                        decoding = avcodec_receive_frame(_avCodecContext, _avFrame);
+                        if (decoding < 0)
+                        {
+                            break;
+                        }
+
+                        const int maxOutputSamples = swr_get_out_samples(
+                            _swrContext, _avFrame->nb_samples);
+                        if (maxOutputSamples <= 0) continue;
+
+                        std::vector<float> converted(maxOutputSamples * _outputChannelCount);
+                        uint8_t* outBuf = reinterpret_cast<uint8_t*>(converted.data());
+
+                        const int convertedSamples = swr_convert(
+                            _swrContext,
+                            &outBuf,
+                            maxOutputSamples,
+                            const_cast<const uint8_t**>(_avFrame->extended_data),
+                            _avFrame->nb_samples);
+
+                        if (convertedSamples > 0)
+                        {
+                            output.insert(
+                                output.end(),
+                                converted.begin(),
+                                converted.begin() + convertedSamples * _outputChannelCount);
+                        }
+
+                        if (static_cast<int>(output.size()) >= totalNeeded)
+                        {
+                            decoding = 1;
+                            break;
+                        }
+                    }
+
+                    if (AVERROR(EAGAIN) == decoding)
+                    {
+                        decoding = 0;
+                    }
+                    else if (AVERROR_EOF == decoding)
+                    {
+                        break;
+                    }
+                    else if (decoding < 0)
+                    {
+                        break;
+                    }
+                    else if (1 == decoding)
+                    {
+                        break;
+                    }
+                }
+                if (packet.p->buf)
+                {
+                    av_packet_unref(packet.p);
+                }
+            }
+            if (packet.p->buf)
+            {
+                av_packet_unref(packet.p);
+            }
+        }
+
+        AudioRead::AVIOBufferData::AVIOBufferData()
+        {
+        }
+
+        AudioRead::AVIOBufferData::AVIOBufferData(const uint8_t* data, size_t size) :
+            data(data),
+            size(size)
+        {
+        }
+
+        int AudioRead::_avIOBufferRead(void* opaque, uint8_t* buf, int bufSize)
+        {
+            AVIOBufferData* bufferData = static_cast<AVIOBufferData*>(opaque);
+
+            const int64_t remaining = bufferData->size - bufferData->offset;
+            int bufSizeClamped = std::min(std::max(
+                static_cast<int64_t>(bufSize),
+                static_cast<int64_t>(0)),
+                remaining);
+            if (!bufSizeClamped)
+            {
+                return AVERROR_EOF;
+            }
+
+            memcpy(buf, bufferData->data + bufferData->offset, bufSizeClamped);
+            bufferData->offset += bufSizeClamped;
+
+            return bufSizeClamped;
+        }
+
+        int64_t AudioRead::_avIOBufferSeek(void* opaque, int64_t offset, int whence)
+        {
+            AVIOBufferData* bufferData = static_cast<AVIOBufferData*>(opaque);
+
+            if (whence & AVSEEK_SIZE)
+            {
+                return bufferData->size;
+            }
+
+            bufferData->offset = std::min(std::max(
+                offset,
+                static_cast<int64_t>(0)),
+                static_cast<int64_t>(bufferData->size));
+
+            return offset;
+        }
+    }
+}
diff --git a/lib/toucanRender/FFmpegAudioRead.h b/lib/toucanRender/FFmpegAudioRead.h
new file mode 100644
index 0000000..0ca77f6
--- /dev/null
+++ b/lib/toucanRender/FFmpegAudioRead.h
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright Contributors to the toucan project.
+
+#pragma once
+
+#include <toucanRender/AudioBuffer.h>
+#include <toucanRender/FFmpeg.h>
+#include <toucanRender/MemoryMap.h>
+
+#include <opentimelineio/version.h>
+
+extern "C"
+{
+#include <libavcodec/avcodec.h>
+#include <libavformat/avformat.h>
+#include <libswresample/swresample.h>
+
+} // extern "C"
+
+#include <filesystem>
+
+namespace toucan
+{
+    namespace ffmpeg
+    {
+        class AudioRead : public std::enable_shared_from_this<AudioRead>
+        {
+        public:
+            AudioRead(
+                const std::filesystem::path&,
+                int outputSampleRate,
+                int outputChannelCount,
+                const MemoryReference& = {});
+
+            virtual ~AudioRead();
+
+            bool hasAudio() const;
+            int getSampleRate() const;
+            int getChannelCount() const;
+            const OTIO_NS::TimeRange& getTimeRange() const;
+
+            AudioBuffer getAudio(
+                const OTIO_NS::RationalTime& time,
+                int sampleCount);
+
+        private:
+            void _seek(const OTIO_NS::RationalTime&);
+            void _decode(std::vector<float>& output, int sampleCount);
+
+            std::filesystem::path _path;
+            MemoryReference _memoryReference;
+            int _outputSampleRate = 0;
+            int _outputChannelCount = 0;
+            OTIO_NS::TimeRange _timeRange;
+            OTIO_NS::RationalTime _currentTime;
+
+            struct AVIOBufferData
+            {
+                AVIOBufferData();
+                AVIOBufferData(const uint8_t*, size_t size);
+
+                const uint8_t* data = nullptr;
+                size_t size = 0;
+                size_t offset = 0;
+            };
+            static int _avIOBufferRead(void* opaque, uint8_t* buf, int bufSize);
+            static int64_t _avIOBufferSeek(void* opaque, int64_t offset, int whence);
+
+            AVFormatContext* _avFormatContext = nullptr;
+            AVIOBufferData _avIOBufferData;
+            uint8_t* _avIOContextBuffer = nullptr;
+            AVIOContext* _avIOContext = nullptr;
+            int _avStream = -1;
+            AVCodecParameters* _avCodecParameters = nullptr;
+            AVCodecContext* _avCodecContext = nullptr;
+            AVFrame* _avFrame = nullptr;
+            SwrContext* _swrContext = nullptr;
+            bool _eof = false;
+
+            std::vector<float> _residual;
+        };
+    }
+}
diff --git a/lib/toucanRender/FFmpegAudioWrite.cpp b/lib/toucanRender/FFmpegAudioWrite.cpp
new file mode 100644
index 0000000..73ae808
--- /dev/null
+++ b/lib/toucanRender/FFmpegAudioWrite.cpp
@@ -0,0 +1,280 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright Contributors to the toucan project.
+
+#include "FFmpegAudioWrite.h"
+
+#include <iostream>
+#include <sstream>
+
+extern "C"
+{
+#include <libavutil/opt.h>
+#include <libavutil/channel_layout.h>
+}
+
+namespace toucan
+{
+    namespace ffmpeg
+    {
+        AudioWrite::AudioWrite(
+            const std::filesystem::path& path,
+            int sampleRate,
+            int channelCount,
+            AudioCodec audioCodec) :
+            _path(path),
+            _sampleRate(sampleRate),
+            _channelCount(channelCount)
+        {
+            av_log_set_level(AV_LOG_QUIET);
+
+            AVCodecID avCodecID = getAudioCodecId(audioCodec);
+
+            int r = avformat_alloc_output_context2(&_avFormatContext, NULL, NULL, _path.string().c_str());
+            if (r < 0)
+            {
+                throw std::runtime_error(getErrorLabel(r));
+            }
+            const AVCodec* avCodec = avcodec_find_encoder(avCodecID);
+            if (!avCodec)
+            {
+                throw std::runtime_error("Cannot find audio encoder");
+            }
+            _avCodecContext = avcodec_alloc_context3(avCodec);
+            if (!_avCodecContext)
+            {
+                throw std::runtime_error("Cannot allocate context");
+            }
+            _avAudioStream = avformat_new_stream(_avFormatContext, avCodec);
+            if (!_avAudioStream)
+            {
+                throw std::runtime_error("Cannot allocate stream");
+            }
+
+            _avCodecContext->codec_id = avCodec->id;
+            _avCodecContext->codec_type = AVMEDIA_TYPE_AUDIO;
+            _avCodecContext->sample_rate = sampleRate;
+            av_channel_layout_default(&_avCodecContext->ch_layout, channelCount);
+            _avCodecContext->sample_fmt = avCodec->sample_fmts ?
+                avCodec->sample_fmts[0] : getAudioSampleFormat(audioCodec);
+            _avCodecContext->time_base = { 1, sampleRate };
+            if (_avFormatContext->oformat->flags & AVFMT_GLOBALHEADER)
+            {
+                _avCodecContext->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
+            }
+
+            r = avcodec_open2(_avCodecContext, avCodec, NULL);
+            if (r < 0)
+            {
+                throw std::runtime_error(getErrorLabel(r));
+            }
+
+            r = avcodec_parameters_from_context(_avAudioStream->codecpar, _avCodecContext);
+            if (r < 0)
+            {
+                throw std::runtime_error(getErrorLabel(r));
+            }
+
+            _avAudioStream->time_base = { 1, sampleRate };
+
+            r = avio_open(&_avFormatContext->pb, _path.string().c_str(), AVIO_FLAG_WRITE);
+            if (r < 0)
+            {
+                throw std::runtime_error(getErrorLabel(r));
+            }
+
+            r = avformat_write_header(_avFormatContext, NULL);
+            if (r < 0)
+            {
+                throw std::runtime_error(getErrorLabel(r));
+            }
+
+            _avPacket = av_packet_alloc();
+            if (!_avPacket)
+            {
+                throw std::runtime_error("Cannot allocate packet");
+            }
+
+            _frameSize = _avCodecContext->frame_size;
+            if (_frameSize <= 0)
+            {
+                _frameSize = 1024;
+            }
+
+            _avFrame = av_frame_alloc();
+            if (!_avFrame)
+            {
+                throw std::runtime_error("Cannot allocate frame");
+            }
+            _avFrame->format = _avCodecContext->sample_fmt;
+            _avFrame->ch_layout = _avCodecContext->ch_layout;
+            _avFrame->sample_rate = sampleRate;
+            _avFrame->nb_samples = _frameSize;
+            r = av_frame_get_buffer(_avFrame, 0);
+            if (r < 0)
+            {
+                throw std::runtime_error(getErrorLabel(r));
+            }
+
+            if (_avCodecContext->sample_fmt != AV_SAMPLE_FMT_FLT)
+            {
+                AVChannelLayout inLayout;
+                av_channel_layout_default(&inLayout, channelCount);
+
+                r = swr_alloc_set_opts2(
+                    &_swrContext,
+                    &_avCodecContext->ch_layout,
+                    _avCodecContext->sample_fmt,
+                    sampleRate,
+                    &inLayout,
+                    AV_SAMPLE_FMT_FLT,
+                    sampleRate,
+                    0,
+                    nullptr);
+                if (r < 0 || !_swrContext)
+                {
+                    throw std::runtime_error("Cannot allocate resampler context");
+                }
+                r = swr_init(_swrContext);
+                if (r < 0)
+                {
+                    throw std::runtime_error("Cannot initialize resampler");
+                }
+            }
+
+            _opened = true;
+        }
+
+        AudioWrite::~AudioWrite()
+        {
+            if (_opened)
+            {
+                _flushFifo();
+                _encodeAudio(nullptr);
+                av_write_trailer(_avFormatContext);
+            }
+            if (_swrContext)
+            {
+                swr_free(&_swrContext);
+            }
+            if (_avFrame)
+            {
+                av_frame_free(&_avFrame);
+            }
+            if (_avPacket)
+            {
+                av_packet_free(&_avPacket);
+            }
+            if (_avCodecContext)
+            {
+                avcodec_free_context(&_avCodecContext);
+            }
+            if (_avFormatContext && _avFormatContext->pb)
+            {
+                avio_closep(&_avFormatContext->pb);
+            }
+            if (_avFormatContext)
+            {
+                avformat_free_context(_avFormatContext);
+            }
+        }
+
+        void AudioWrite::writeAudio(const AudioBuffer& buffer)
+        {
+            _fifo.insert(_fifo.end(), buffer.data.begin(), buffer.data.end());
+
+            while (static_cast<int>(_fifo.size()) / _channelCount >= _frameSize)
+            {
+                av_frame_make_writable(_avFrame);
+                _avFrame->nb_samples = _frameSize;
+
+                if (_swrContext)
+                {
+                    const uint8_t* inBuf = reinterpret_cast<const uint8_t*>(_fifo.data());
+                    swr_convert(
+                        _swrContext,
+                        _avFrame->extended_data,
+                        _frameSize,
+                        &inBuf,
+                        _frameSize);
+                }
+                else
+                {
+                    memcpy(
+                        _avFrame->data[0],
+                        _fifo.data(),
+                        _frameSize * _channelCount * sizeof(float));
+                }
+
+                _fifo.erase(_fifo.begin(), _fifo.begin() + _frameSize * _channelCount);
+
+                _avFrame->pts = _pts;
+                _pts += _frameSize;
+                _encodeAudio(_avFrame);
+            }
+        }
+
+        void AudioWrite::_flushFifo()
+        {
+            if (_fifo.empty()) return;
+
+            const int remainingSamples = static_cast<int>(_fifo.size()) / _channelCount;
+            if (remainingSamples <= 0) return;
+
+            av_frame_make_writable(_avFrame);
+            _avFrame->nb_samples = remainingSamples;
+
+            if (_swrContext)
+            {
+                const uint8_t* inBuf = reinterpret_cast<const uint8_t*>(_fifo.data());
+                swr_convert(
+                    _swrContext,
+                    _avFrame->extended_data,
+                    remainingSamples,
+                    &inBuf,
+                    remainingSamples);
+            }
+            else
+            {
+                memcpy(
+                    _avFrame->data[0],
+                    _fifo.data(),
+                    remainingSamples * _channelCount * sizeof(float));
+            }
+
+            _fifo.clear();
+
+            _avFrame->pts = _pts;
+            _pts += remainingSamples;
+            _encodeAudio(_avFrame);
+        }
+
+        void AudioWrite::_encodeAudio(AVFrame* frame)
+        {
+            int r = avcodec_send_frame(_avCodecContext, frame);
+            if (r < 0)
+            {
+                throw std::runtime_error(getErrorLabel(r));
+            }
+
+            while (r >= 0)
+            {
+                r = avcodec_receive_packet(_avCodecContext, _avPacket);
+                if (r == AVERROR(EAGAIN) || r == AVERROR_EOF)
+                {
+                    return;
+                }
+                else if (r < 0)
+                {
+                    throw std::runtime_error(getErrorLabel(r));
+                }
+                _avPacket->stream_index = _avAudioStream->index;
+                r = av_interleaved_write_frame(_avFormatContext, _avPacket);
+                if (r < 0)
+                {
+                    throw std::runtime_error(getErrorLabel(r));
+                }
+                av_packet_unref(_avPacket);
+            }
+        }
+    }
+}
diff --git a/lib/toucanRender/FFmpegAudioWrite.h b/lib/toucanRender/FFmpegAudioWrite.h
new file mode 100644
index 0000000..bf3da66
--- /dev/null
+++ b/lib/toucanRender/FFmpegAudioWrite.h
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright Contributors to the toucan project.
+
+#pragma once
+
+#include <toucanRender/AudioBuffer.h>
+#include <toucanRender/FFmpeg.h>
+
+#include <opentimelineio/version.h>
+
+extern "C"
+{
+#include <libavcodec/avcodec.h>
+#include <libavformat/avformat.h>
+#include <libswresample/swresample.h>
+
+} // extern "C"
+
+#include <filesystem>
+#include <vector>
+
+namespace toucan
+{
+    namespace ffmpeg
+    {
+        class AudioWrite : public std::enable_shared_from_this<AudioWrite>
+        {
+        public:
+            AudioWrite(
+                const std::filesystem::path&,
+                int sampleRate,
+                int channelCount,
+                AudioCodec);
+
+            virtual ~AudioWrite();
+
+            void writeAudio(const AudioBuffer&);
+
+        private:
+            void _encodeAudio(AVFrame*);
+            void _flushFifo();
+
+            std::filesystem::path _path;
+            int _sampleRate = 0;
+            int _channelCount = 0;
+            AVFormatContext* _avFormatContext = nullptr;
+            AVCodecContext* _avCodecContext = nullptr;
+            AVStream* _avAudioStream = nullptr;
+            AVPacket* _avPacket = nullptr;
+            AVFrame* _avFrame = nullptr;
+            SwrContext* _swrContext = nullptr;
+            int64_t _pts = 0;
+            int _frameSize = 0;
+            std::vector<float> _fifo;
+            bool _opened = false;
+        };
+    }
+}
diff --git a/lib/toucanRender/FFmpegWrite.cpp b/lib/toucanRender/FFmpegWrite.cpp
index b1942b4..476e9e8 100644
--- a/lib/toucanRender/FFmpegWrite.cpp
+++ b/lib/toucanRender/FFmpegWrite.cpp
@@ -14,6 +14,7 @@ extern "C"
 {
 #include <libavutil/imgutils.h>
 #include <libavutil/opt.h>
+#include <libavutil/channel_layout.h>
 }
 
 namespace toucan
@@ -24,10 +25,15 @@ namespace toucan
             const std::filesystem::path& path,
             const OIIO::ImageSpec& spec,
             const OTIO_NS::TimeRange& timeRange,
-            VideoCodec videoCodec) :
+            VideoCodec videoCodec,
+            int audioSampleRate,
+            int audioChannelCount,
+            AudioCodec audioCodec) :
             _path(path),
             _spec(spec),
-            _timeRange(timeRange)
+            _timeRange(timeRange),
+            _audioSampleRate(audioSampleRate),
+            _audioChannelCount(audioChannelCount)
         {
             av_log_set_level(AV_LOG_QUIET);
             //av_log_set_level(AV_LOG_VERBOSE);
@@ -93,6 +99,105 @@ namespace toucan
             _avVideoStream->time_base = { rational.second, rational.first };
             _avVideoStream->avg_frame_rate = { rational.first, rational.second };
 
+            if (audioSampleRate > 0 && audioChannelCount > 0)
+            {
+                AVCodecID audioCodecId = getAudioCodecId(audioCodec);
+                const AVCodec* audioAvCodec = avcodec_find_encoder(audioCodecId);
+                if (!audioAvCodec)
+                {
+                    throw std::runtime_error("Cannot find audio encoder");
+                }
+                _avAudioCodecContext = avcodec_alloc_context3(audioAvCodec);
+                if (!_avAudioCodecContext)
+                {
+                    throw std::runtime_error("Cannot allocate audio context");
+                }
+                _avAudioStream = avformat_new_stream(_avFormatContext, audioAvCodec);
+                if (!_avAudioStream)
+                {
+                    throw std::runtime_error("Cannot allocate audio stream");
+                }
+
+                _avAudioCodecContext->codec_id = audioAvCodec->id;
+                _avAudioCodecContext->codec_type = AVMEDIA_TYPE_AUDIO;
+                _avAudioCodecContext->sample_rate = audioSampleRate;
+                av_channel_layout_default(&_avAudioCodecContext->ch_layout, audioChannelCount);
+                _avAudioCodecContext->sample_fmt = audioAvCodec->sample_fmts ?
+                    audioAvCodec->sample_fmts[0] : getAudioSampleFormat(audioCodec);
+                _avAudioCodecContext->time_base = { 1, audioSampleRate };
+                if (_avFormatContext->oformat->flags & AVFMT_GLOBALHEADER)
+                {
+                    _avAudioCodecContext->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
+                }
+
+                r = avcodec_open2(_avAudioCodecContext, audioAvCodec, NULL);
+                if (r < 0)
+                {
+                    throw std::runtime_error(getErrorLabel(r));
+                }
+
+                r = avcodec_parameters_from_context(_avAudioStream->codecpar, _avAudioCodecContext);
+                if (r < 0)
+                {
+                    throw std::runtime_error(getErrorLabel(r));
+                }
+
+                _avAudioStream->time_base = { 1, audioSampleRate };
+
+                _audioFrameSize = _avAudioCodecContext->frame_size;
+                if (_audioFrameSize <= 0)
+                {
+                    _audioFrameSize = 1024;
+                }
+
+                _avAudioPacket = av_packet_alloc();
+                if (!_avAudioPacket)
+                {
+                    throw std::runtime_error("Cannot allocate audio packet");
+                }
+
+                _avAudioFrame = av_frame_alloc();
+                if (!_avAudioFrame)
+                {
+                    throw std::runtime_error("Cannot allocate audio frame");
+                }
+                _avAudioFrame->format = _avAudioCodecContext->sample_fmt;
+                _avAudioFrame->ch_layout = _avAudioCodecContext->ch_layout;
+                _avAudioFrame->sample_rate = audioSampleRate;
+                _avAudioFrame->nb_samples = _audioFrameSize;
+                r = av_frame_get_buffer(_avAudioFrame, 0);
+                if (r < 0)
+                {
+                    throw std::runtime_error(getErrorLabel(r));
+                }
+
+                if (_avAudioCodecContext->sample_fmt != AV_SAMPLE_FMT_FLT)
+                {
+                    AVChannelLayout inLayout;
+                    av_channel_layout_default(&inLayout, audioChannelCount);
+
+                    r = swr_alloc_set_opts2(
+                        &_swrContext,
+                        &_avAudioCodecContext->ch_layout,
+                        _avAudioCodecContext->sample_fmt,
+                        audioSampleRate,
+                        &inLayout,
+                        AV_SAMPLE_FMT_FLT,
+                        audioSampleRate,
+                        0,
+                        nullptr);
+                    if (r < 0 || !_swrContext)
+                    {
+                        throw std::runtime_error("Cannot allocate resampler context");
+                    }
+                    r = swr_init(_swrContext);
+                    if (r < 0)
+                    {
+                        throw std::runtime_error("Cannot initialize resampler");
+                    }
+                }
+            }
+
             //av_dump_format(_avFormatContext, 0, _path.string().c_str(), 1);
 
             r = avio_open(&_avFormatContext->pb, _path.string().c_str(), AVIO_FLAG_WRITE);
@@ -141,8 +246,29 @@ namespace toucan
             if (_opened)
             {
                 _encodeVideo(nullptr);
+                if (_avAudioCodecContext)
+                {
+                    _flushAudioFifo();
+                    _encodeAudio(nullptr);
+                }
                 av_write_trailer(_avFormatContext);
             }
+            if (_swrContext)
+            {
+                swr_free(&_swrContext);
+            }
+            if (_avAudioFrame)
+            {
+                av_frame_free(&_avAudioFrame);
+            }
+            if (_avAudioPacket)
+            {
+                av_packet_free(&_avAudioPacket);
+            }
+            if (_avAudioCodecContext)
+            {
+                avcodec_free_context(&_avAudioCodecContext);
+            }
             if (_swsContext)
             {
                 sws_freeContext(_swsContext);
@@ -299,6 +425,44 @@ namespace toucan
             _encodeVideo(_avFrame);
         }
 
+        void Write::writeAudio(const AudioBuffer& buffer)
+        {
+            if (!_avAudioCodecContext) return;
+
+            _audioFifo.insert(_audioFifo.end(), buffer.data.begin(), buffer.data.end());
+
+            while (static_cast<int>(_audioFifo.size()) / _audioChannelCount >= _audioFrameSize)
+            {
+                av_frame_make_writable(_avAudioFrame);
+                _avAudioFrame->nb_samples = _audioFrameSize;
+
+                if (_swrContext)
+                {
+                    const uint8_t* inBuf = reinterpret_cast<const uint8_t*>(_audioFifo.data());
+                    swr_convert(
+                        _swrContext,
+                        _avAudioFrame->extended_data,
+                        _audioFrameSize,
+                        &inBuf,
+                        _audioFrameSize);
+                }
+                else
+                {
+                    memcpy(
+                        _avAudioFrame->data[0],
+                        _audioFifo.data(),
+                        _audioFrameSize * _audioChannelCount * sizeof(float));
+                }
+
+                _audioFifo.erase(_audioFifo.begin(),
+                    _audioFifo.begin() + _audioFrameSize * _audioChannelCount);
+
+                _avAudioFrame->pts = _audioPts;
+                _audioPts += _audioFrameSize;
+                _encodeAudio(_avAudioFrame);
+            }
+        }
+
         void Write::_encodeVideo(AVFrame* frame)
         {
             int r = avcodec_send_frame(_avCodecContext, frame);
@@ -326,5 +490,69 @@ namespace toucan
                 av_packet_unref(_avPacket);
             }
         }
+
+        void Write::_encodeAudio(AVFrame* frame)
+        {
+            int r = avcodec_send_frame(_avAudioCodecContext, frame);
+            if (r < 0)
+            {
+                throw std::runtime_error(getErrorLabel(r));
+            }
+
+            while (r >= 0)
+            {
+                r = avcodec_receive_packet(_avAudioCodecContext, _avAudioPacket);
+                if (r == AVERROR(EAGAIN) || r == AVERROR_EOF)
+                {
+                    return;
+                }
+                else if (r < 0)
+                {
+                    throw std::runtime_error(getErrorLabel(r));
+                }
+                _avAudioPacket->stream_index = _avAudioStream->index;
+                r = av_interleaved_write_frame(_avFormatContext, _avAudioPacket);
+                if (r < 0)
+                {
+                    throw std::runtime_error(getErrorLabel(r));
+                }
+                av_packet_unref(_avAudioPacket);
+            }
+        }
+
+        void Write::_flushAudioFifo()
+        {
+            if (_audioFifo.empty()) return;
+
+            const int remainingSamples = static_cast<int>(_audioFifo.size()) / _audioChannelCount;
+            if (remainingSamples <= 0) return;
+
+            av_frame_make_writable(_avAudioFrame);
+            _avAudioFrame->nb_samples = remainingSamples;
+
+            if (_swrContext)
+            {
+                const uint8_t* inBuf = reinterpret_cast<const uint8_t*>(_audioFifo.data());
+                swr_convert(
+                    _swrContext,
+                    _avAudioFrame->extended_data,
+                    remainingSamples,
+                    &inBuf,
+                    remainingSamples);
+            }
+            else
+            {
+                memcpy(
+                    _avAudioFrame->data[0],
+                    _audioFifo.data(),
+                    remainingSamples * _audioChannelCount * sizeof(float));
+            }
+
+            _audioFifo.clear();
+
+            _avAudioFrame->pts = _audioPts;
+            _audioPts += remainingSamples;
+            _encodeAudio(_avAudioFrame);
+        }
     }
 }
diff --git a/lib/toucanRender/FFmpegWrite.h b/lib/toucanRender/FFmpegWrite.h
index 13f797c..5f88dc7 100644
--- a/lib/toucanRender/FFmpegWrite.h
+++ b/lib/toucanRender/FFmpegWrite.h
@@ -3,6 +3,7 @@
 
 #pragma once
 
+#include <toucanRender/AudioBuffer.h>
 #include <toucanRender/FFmpeg.h>
 
 #include <opentimelineio/version.h>
@@ -12,11 +13,13 @@
 extern "C"
 {
 #include <libavformat/avformat.h>
+#include <libswresample/swresample.h>
 #include <libswscale/swscale.h>
 
 } // extern "C"
 
 #include <filesystem>
+#include <vector>
 
 namespace toucan
 {
@@ -29,14 +32,20 @@ namespace toucan
                 const std::filesystem::path&,
                 const OIIO::ImageSpec&,
                 const OTIO_NS::TimeRange&,
-                VideoCodec);
+                VideoCodec,
+                int audioSampleRate = 0,
+                int audioChannelCount = 0,
+                AudioCodec audioCodec = AudioCodec::PCM_S16LE);
 
             virtual ~Write();
 
             void writeImage(const OIIO::ImageBuf&, const OTIO_NS::RationalTime&);
+            void writeAudio(const AudioBuffer&);
 
         private:
             void _encodeVideo(AVFrame*);
+            void _encodeAudio(AVFrame*);
+            void _flushAudioFifo();
 
             std::filesystem::path _path;
             OIIO::ImageSpec _spec;
@@ -50,6 +59,17 @@ namespace toucan
             AVFrame* _avFrame2 = nullptr;
             SwsContext* _swsContext = nullptr;
             bool _opened = false;
+
+            AVCodecContext* _avAudioCodecContext = nullptr;
+            AVStream* _avAudioStream = nullptr;
+            AVPacket* _avAudioPacket = nullptr;
+            AVFrame* _avAudioFrame = nullptr;
+            SwrContext* _swrContext = nullptr;
+            int64_t _audioPts = 0;
+            int _audioSampleRate = 0;
+            int _audioChannelCount = 0;
+            int _audioFrameSize = 0;
+            std::vector<float> _audioFifo;
         };
     }
 }
diff --git a/lib/toucanRender/TimelineAlgo.cpp b/lib/toucanRender/TimelineAlgo.cpp
index 81207d4..0b5996f 100644
--- a/lib/toucanRender/TimelineAlgo.cpp
+++ b/lib/toucanRender/TimelineAlgo.cpp
@@ -24,4 +24,22 @@ namespace toucan
         }
         return out;
     }
+
+    std::vector<OTIO_NS::SerializableObject::Retainer<OTIO_NS::Clip> >
+        getAudioClips(const OTIO_NS::SerializableObject::Retainer<OTIO_NS::Timeline>& timeline)
+    {
+        std::vector<OTIO_NS::SerializableObject::Retainer<OTIO_NS::Clip> > out;
+        for (const auto& child : timeline->tracks()->children())
+        {
+            if (auto track = OTIO_NS::dynamic_retainer_cast<OTIO_NS::Track>(child))
+            {
+                if (OTIO_NS::Track::Kind::audio == track->kind())
+                {
+                    const auto clips = track->find_clips(nullptr, std::nullopt, true);
+                    out.insert(out.end(), clips.begin(), clips.end());
+                }
+            }
+        }
+        return out;
+    }
 }
diff --git a/lib/toucanRender/TimelineAlgo.h b/lib/toucanRender/TimelineAlgo.h
index b47a79e..c24ad87 100644
--- a/lib/toucanRender/TimelineAlgo.h
+++ b/lib/toucanRender/TimelineAlgo.h
@@ -10,4 +10,8 @@ namespace toucan
     //! Get the video clips in a timeline.
     std::vector<OTIO_NS::SerializableObject::Retainer<OTIO_NS::Clip> >
         getVideoClips(const OTIO_NS::SerializableObject::Retainer<OTIO_NS::Timeline>&);
+
+    //! Get the audio clips in a timeline.
+    std::vector<OTIO_NS::SerializableObject::Retainer<OTIO_NS::Clip> >
+        getAudioClips(const OTIO_NS::SerializableObject::Retainer<OTIO_NS::Timeline>&);
 }