Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
201 changes: 140 additions & 61 deletions sdk_v2/cpp/src/ep_detection/cuda_ep_bootstrapper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,26 @@
// Licensed under the MIT License.
#include "ep_detection/cuda_ep_bootstrapper.h"

#include "http/http_client.h"
#include "http/http_download.h"
#include "logger.h"
#include "util/file_lock.h"
#include "http/http_download.h"
#include "util/sha256.h"
#include "util/zip_extract.h"

#include <fmt/format.h>
#include <nlohmann/json.hpp>

#include <algorithm>
#include <atomic>
#include <cctype>
#include <cstdio>
#include <filesystem>
#include <map>
#include <optional>
#include <stdexcept>
#include <string>
#include <unordered_map>

#ifdef _WIN32
#define WIN32_LEAN_AND_MEAN
Expand All @@ -25,46 +32,93 @@ namespace {

constexpr const char* kPackageFileName = "cuda-ep.zip";
constexpr const char* kLockFileName = "cuda-ep.lock";
constexpr const char* kStagingDirName = "cuda-ep-staging";
constexpr const char* kUserAgent = "FoundryLocal";
constexpr int kMaxInstallAttempts = 5;

// CUDA EP package is built against the ONNX Runtime version we link against, so
// WinML and non-WinML builds need separate downloads. Hashes mirror the C# core
// (see neutron.main/src/Service/Providers/Detector/CudaEpBootstrapper.cs).
// WinML build -> ORT 1.23.2 (cuda-ep-20260501-182408.zip)
// Non-WinML -> ORT 1.25.1 (cuda-ep-20260501-062935.zip)
#if defined(FOUNDRY_LOCAL_USE_WINML) && FOUNDRY_LOCAL_USE_WINML
constexpr const char* kDownloadUrl =
"https://foundrypackages-ffhrdhbxb7gpdreh.b02.azurefd.net/cuda-ep-20260501-182408.zip";
#else
constexpr const char* kDownloadUrl =
"https://foundrypackages-ffhrdhbxb7gpdreh.b02.azurefd.net/cuda-ep-20260501-062935.zip";
#endif

struct ExpectedBinary {
const char* filename;
const char* sha256;
// Manifest URL on the CDN — published by the CUDA EP upload pipeline.
constexpr const char* kManifestUrl =
"https://foundrypackages-ffhrdhbxb7gpdreh.b02.azurefd.net/cuda_ep_prod.json";

// -----------------------------------------------------------------------
// Platform detection
//
// Returns the manifest platform key and ORT registration library filename
// for the current build target, or std::nullopt if unsupported.
//
// To add a platform:
// 1. Uncomment its #elif block below.
// 2. Uncomment its entry in $binaryNames / $expectedPlatforms in
// cuda-ep-upload.yml and update $platformPattern there too.
// -----------------------------------------------------------------------
struct PlatformInfo {
const char* key; // manifest lookup key, e.g. "win-x64"
const char* ep_lib; // ORT registration library filename
};

#if defined(FOUNDRY_LOCAL_USE_WINML) && FOUNDRY_LOCAL_USE_WINML
constexpr ExpectedBinary kExpectedBinaries[] = {
{"onnxruntime_providers_cuda.dll", "4CEF18654878CEFCFCF8488E9C3A705EB5327AA9B5556155C319C9CBB2D98FCF"},
{"onnxruntime-genai-cuda.dll", "BC953F8E2AAFC6219B2D723B65AB8F1A9426A6B7724D6A01ED756FAE8C3DE6AE"},
};
std::optional<PlatformInfo> GetPlatformInfo() {
#if defined(_WIN32) && !defined(_M_ARM64)
return PlatformInfo{"win-x64", "onnxruntime_providers_cuda.dll"};

// Uncomment when win-arm64 CUDA EP build is available (see cuda-ep-upload.yml):
// #elif defined(_WIN32) && defined(_M_ARM64)
// return PlatformInfo{"win-arm64", "onnxruntime_providers_cuda.dll"};

// Uncomment when linux-x64 CUDA EP build is available (see cuda-ep-upload.yml):
// #elif defined(__linux__) && defined(__x86_64__)
// return PlatformInfo{"linux-x64", "libonnxruntime_providers_cuda.so"};

// Uncomment when linux-arm64 CUDA EP build is available (see cuda-ep-upload.yml):
// #elif defined(__linux__) && defined(__aarch64__)
// return PlatformInfo{"linux-arm64", "libonnxruntime_providers_cuda.so"};

#else
constexpr ExpectedBinary kExpectedBinaries[] = {
{"onnxruntime_providers_cuda.dll", "DD540FCFECFBC68B4675C9ADF09C2858CF6B054563859D79598AA2524406A76F"},
{"onnxruntime-genai-cuda.dll", "BC953F8E2AAFC6219B2D723B65AB8F1A9426A6B7724D6A01ED756FAE8C3DE6AE"},
};
return std::nullopt; // Platform not yet supported — graceful no-op.
#endif
}

constexpr const char* kRegistrationName = "Foundry.CUDA";
constexpr const char* kCudaProviderDll = "onnxruntime_providers_cuda.dll";

struct ManifestInfo {
std::string version;
std::string download_url;
std::unordered_map<std::string, std::string> sha256; // filename -> expected hash
};

/// Fetch and parse the CUDA EP manifest from the CDN.
/// Returns the package entry for the given platform key.
ManifestInfo FetchManifest(const char* platform_key, fl::ILogger& logger) {
logger.Log(fl::LogLevel::Debug,
fmt::format("CUDA EP: fetching manifest from {}", kManifestUrl));

auto body = fl::http::HttpGetWithRetry(kManifestUrl, kUserAgent, logger);
auto j = nlohmann::json::parse(body);

ManifestInfo info;
info.version = j.at("version").get<std::string>();

auto& packages = j.at("packages");
if (!packages.contains(platform_key)) {
throw std::runtime_error(
fmt::format("CUDA EP manifest has no entry for platform '{}'", platform_key));
}

auto& pkg = packages.at(platform_key);
info.download_url = pkg.at("url").get<std::string>();

for (auto& [filename, hash] : pkg.at("sha256").items()) {
info.sha256[filename] = hash.get<std::string>();
}

return info;
}

/// Verify all expected binaries exist and have correct SHA256 hashes.
bool VerifyPackage(const std::filesystem::path& dir, fl::ILogger& logger) {
for (const auto& expected : kExpectedBinaries) {
auto file_path = dir / expected.filename;
bool VerifyPackage(const std::filesystem::path& dir,
const std::unordered_map<std::string, std::string>& expected_hashes,
fl::ILogger& logger) {
for (const auto& [filename, expected_hash] : expected_hashes) {
auto file_path = dir / filename;

if (!std::filesystem::exists(file_path)) {
return false;
Expand All @@ -73,12 +127,11 @@ bool VerifyPackage(const std::filesystem::path& dir, fl::ILogger& logger) {
auto hash = fl::Sha256File(file_path);

// Case-insensitive comparison
std::string expected_hash(expected.sha256);
if (!std::equal(hash.begin(), hash.end(), expected_hash.begin(), expected_hash.end(),
[](char a, char b) { return std::toupper(a) == std::toupper(b); })) {
logger.Log(fl::LogLevel::Warning,
fmt::format("CUDA EP: hash mismatch for {}: got {}, expected {}",
expected.filename, hash, expected.sha256));
filename, hash, expected_hash));
return false;
}
}
Expand Down Expand Up @@ -118,69 +171,96 @@ bool CudaEpBootstrapper::DownloadAndRegister(bool force,

attempts_++;

// Bail out early if this platform is not yet in the manifest.
auto platform_info = GetPlatformInfo();
if (!platform_info) {
logger.Log(LogLevel::Information, "CUDA EP: current platform is not yet supported");
return false;
}

auto ep_dir = std::filesystem::path(ep_dir_);
auto lock_path = ep_dir.parent_path() / kLockFileName;
auto zip_path = ep_dir.parent_path() / kPackageFileName;
auto parent_dir = ep_dir.parent_path();

try {
// Cross-process lock to prevent concurrent installs
FileLock lock(lock_path);
// Fetch the manifest before acquiring the lock to avoid holding it during network I/O.
auto manifest = FetchManifest(platform_info->key, logger);
logger.Log(LogLevel::Information,
fmt::format("CUDA EP: manifest fetched (version={}, platform={})",
manifest.version, platform_info->key));

// Check if package already exists and is valid
if (VerifyPackage(ep_dir, logger)) {
// Cross-process lock to prevent concurrent installs.
std::filesystem::create_directories(parent_dir);
FileLock lock(parent_dir / kLockFileName);

// Re-check after acquiring the lock — another process may have already updated.
if (!force && VerifyPackage(ep_dir, manifest.sha256, logger)) {
logger.Log(LogLevel::Information, "CUDA EP: package already valid, skipping download");
} else {
// Clean up any partial install
if (std::filesystem::exists(ep_dir)) {
std::filesystem::remove_all(ep_dir);
// Download to a staging directory so a failure never corrupts the existing install.
auto staging_dir = parent_dir / kStagingDirName;
if (std::filesystem::exists(staging_dir)) {
std::filesystem::remove_all(staging_dir);
}
std::filesystem::create_directories(staging_dir);

std::filesystem::create_directories(ep_dir);
auto zip_path = staging_dir / kPackageFileName;

// Download
logger.Log(LogLevel::Information, "CUDA EP: downloading from CDN...");
logger.Log(LogLevel::Information,
fmt::format("CUDA EP: downloading for {}...", platform_info->key));
logger.Log(LogLevel::Debug,
fmt::format("CUDA EP: download URL is {}", manifest.download_url));

// Bridge callback-based cancellation to the atomic flag HttpDownloadFile expects
std::atomic<bool> cancel_flag{false};

auto download_progress = [&](float pct) {
if (progress_cb) {
// 0-80% for download phase
// 080% for the download phase.
if (!progress_cb(name_, pct * 0.8f)) {
cancel_flag.store(true);
}
}
};

if (!HttpDownloadFile(kDownloadUrl, zip_path, kUserAgent,
if (!HttpDownloadFile(manifest.download_url, zip_path, kUserAgent,
&cancel_flag, download_progress, logger)) {
logger.Log(LogLevel::Warning, "CUDA EP: download failed (see prior log for details)");
std::filesystem::remove_all(staging_dir);
return false;
}

// Extract
logger.Log(LogLevel::Information, "CUDA EP: extracting...");
logger.Log(LogLevel::Information,
fmt::format("CUDA EP: extracting package to {}", staging_dir.string()));

if (!ExtractZip(zip_path, ep_dir, logger)) {
if (!ExtractZip(zip_path, staging_dir, logger)) {
logger.Log(LogLevel::Warning, "CUDA EP: extraction failed");
std::filesystem::remove_all(staging_dir);
return false;
}

// Clean up zip
std::filesystem::remove(zip_path);

// Verify
if (!VerifyPackage(ep_dir, logger)) {
logger.Log(LogLevel::Warning, "CUDA EP: verification failed after download");
if (!VerifyPackage(staging_dir, manifest.sha256, logger)) {
logger.Log(LogLevel::Warning, "CUDA EP: verification failed after extraction");
std::filesystem::remove_all(staging_dir);
return false;
}

logger.Log(LogLevel::Debug,
fmt::format("CUDA EP: staging verification succeeded, promoting to {}",
ep_dir.string()));

// Atomic swap: delete old install, rename staging to target.
if (std::filesystem::exists(ep_dir)) {
std::filesystem::remove_all(ep_dir);
}
std::filesystem::rename(staging_dir, ep_dir);
logger.Log(LogLevel::Information, "CUDA EP: successfully installed.");
}

if (progress_cb) {
progress_cb(name_, 90.0f);
}

// Register with ORT
// Register with ORT.
#ifdef _WIN32
// Permanently prepend the EP directory to PATH. The zip bundles all
// required CUDA/cuDNN DLLs, so no system CUDA install is needed.
Expand All @@ -202,9 +282,9 @@ bool CudaEpBootstrapper::DownloadAndRegister(bool force,
}
#endif

auto cuda_dll_path = ep_dir / kCudaProviderDll;
auto cuda_lib_path = ep_dir / platform_info->ep_lib;

if (!register_ep_(kRegistrationName, cuda_dll_path)) {
if (!register_ep_(kRegistrationName, cuda_lib_path)) {
logger.Log(LogLevel::Warning, "CUDA EP: ORT registration failed");
return false;
}
Expand All @@ -215,10 +295,9 @@ bool CudaEpBootstrapper::DownloadAndRegister(bool force,
progress_cb(name_, 100.0f);
}

// Bootstrapper-side log — captures the install dir, which the central
// register_ep callback (logs library + version) doesn't have.
logger.Log(LogLevel::Information,
fmt::format("CUDA EP: ready (install_path={})", ep_dir.string()));
fmt::format("CUDA EP: ready (install_path={}, version={})",
ep_dir.string(), manifest.version));
return true;
} catch (const std::exception& e) {
logger.Log(LogLevel::Warning, fmt::format("CUDA EP: error: {}", e.what()));
Expand Down