Sunday, May 17, 2009

Comparing Version Numbers

Comparing version numbers is one of those operations that seems like it ought to be simple but can have a fair bit of complexity under the surface. How would you write an algorithm that gives the correct result for all of the following?

  • 2.10 comes after 2.9
  • 2.11-beta comes after 2.11-alpha
  • 2.11 comes after 2.11-beta
  • 7 comes after 2003
  • the revision that was checked in on 1/30/2009 and never given a version number comes after the revision that was checked in on 8/1/2008 and never given a version number

The Debian Project deals with more version numbers than anyone - over 30,000 packages over more than fifteen years, each of which must be separately versioned and tracked for upgrade and depedency purposes - so it's no surprise that they've developed a policy and algorithm that can handle all of the above cases. Debian's algorithm neatly handles cases like 2.9 versus 2.10, and it has two main additions over other version comparison algorithms: it states that ~ is sorted before anything, even the empty string, so 2.11~beta comes before 2.11, and it permits the addition of an epoch to "reset" version numbers, so 2003, which has the default epoch of 0, comes before 1:7, which has an epoch of 1. All of this makes Debian's algorithm a good approach for comparing version numbers even if you're not a Debian package maintainer.

I recently needed to compare some Debian version numbers, but the only C or C++ implementations of this algorithm that I could find were GPL'ed, and my target environment wouldn't permit me to simply shell out to dpkg --compare-versions, so I wrote my own implementation. Here it is, under the public domain. Note that, although it's written as a C++ class, the method bodies are in C style to make it easier to reuse them in a non-C++ project.

DebVersion.h:

#ifndef DEBVERSIONH
#define DEBVERSIONH

#include <stdlib.h>
#include <iostream>
#include <boost/operators.hpp>

class DebVersion : public boost::totally_ordered<DebVersion> {
public:
  DebVersion() : mEpoch(0), mUpstream(NULL), mRevision(NULL) {}
  DebVersion(const DebVersion& other) { *this = other; }
  explicit DebVersion(const char *version);
  ~DebVersion();

  int Epoch() const { return mEpoch; }
  const char *Upstream() const { return mUpstream; }
  const char *Revision() const { return mRevision; }

  void Epoch(int new_epoch) { mEpoch = new_epoch; }
  void Upstream(const char *new_upstream) { free(mUpstream); mUpstream = strdup(new_upstream); }
  void Revision(const char *new_revision) { free(mRevision); mRevision = strdup(new_revision); }
  void ClearRevision() { Revision("0"); }

  DebVersion& operator=(const DebVersion& other);
  bool operator<(const DebVersion& other) const;
  bool operator==(const DebVersion& other) const;

protected:
  int mEpoch;
  char *mUpstream;
  char *mRevision;

  static int CompareComponent(const char *a, const char *b);
};

inline DebVersion::~DebVersion()
{
  free(mUpstream);
  free(mRevision);
}

inline bool DebVersion::operator==(const DebVersion& other) const
{
  return Epoch() == other.Epoch()
    && strcmp(Upstream(), other.Upstream()) == 0
    && strcmp(Revision(), other.Revision()) == 0;
}

inline DebVersion& DebVersion::operator=(const DebVersion& other)
{
  mEpoch = other.Epoch();
  mUpstream = strdup(other.Upstream());
  mRevision = strdup(other.Revision());
  return *this;
}

void operator<<(std::ostream& o, const DebVersion& ver);

#endif

DebVersion.cpp:

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>

#include "DebVersion.h"

void operator<<(std::ostream& o, const DebVersion& ver)
{
  if (ver.Epoch() != 0) {
    o << ver.Epoch() << ":";
  }
  o << ver.Upstream();
  if (0 != strcmp(ver.Revision(), "0")) {
    o << "-" << ver.Revision();
  }
}

DebVersion::DebVersion(const char *version)
{
  // Extract epoch.  The Debian Policy Manual says that epoch must be
  // numeric, so atoi is safe.
  const char *epoch_end = strchr(version, ':');
  if (epoch_end != NULL) {
    mEpoch = atoi(version);
  } else {
    mEpoch = 0;
  }

  const char *upstream_start;
  if (epoch_end) {
    upstream_start = epoch_end + 1;
  } else {
    upstream_start = version;
  }

  const char *upstream_end = strrchr(upstream_start, '-');
  size_t upstream_size;
  if (upstream_end == NULL) {
    upstream_size = strlen(upstream_start);
  } else {
    upstream_size = upstream_end - upstream_start;
  }

  mUpstream = (char*) malloc(upstream_size + 1);
  strncpy(mUpstream, upstream_start, upstream_size);
  mUpstream[upstream_size] = '\0';

  if (upstream_end == NULL) {
    mRevision = strdup("0");
  } else {
    mRevision = strdup(upstream_end + 1);
  }
}

/**Compare two components of a Debian-style version.  Return -1, 0, or 1
 * if a is less than, equal to, or greater than b, respectively.
 */
int DebVersion::CompareComponent(const char *a, const char *b)
{
  while (*a && *b) {

    while (*a && *b && !isdigit(*a) && !isdigit(*b)) {
      if (*a != *b) {
        if (*a == '~') return -1;
        if (*b == '~') return 1;
        return *a < *b ? -1 : 1;
      }
      a++;
      b++;
    }
    if (*a && *b && (!isdigit(*a) || !isdigit(*b))) {
      if (*a == '~') return -1;
      if (*b == '~') return 1;
      return isdigit(*a) ? -1 : 1;
    }

    char *next_a, *next_b;
    long int num_a = strtol(a, &next_a, 10);
    long int num_b = strtol(b, &next_b, 10);
    if (num_a != num_b) {
      return num_a < num_b ? -1 : 1;
    }
    a = next_a;
    b = next_b;

  }
  if (!*a && !*b) {
    return 0;
  } else if (*a) {
    return *a == '~' ? -1 : 1;
  } else {
    return *b == '~' ? 1 : -1;
  }
}

bool DebVersion::operator<(const DebVersion& other) const
{
  if (Epoch() != other.Epoch()) {
    return Epoch() < other.Epoch();
  }

  int result = CompareComponent(Upstream(), other.Upstream());
  if (result) {
    return -1 == result;
  }

  return -1 == CompareComponent(Revision(), other.Revision());
}

And, because no code is complete without some good unit tests, here's DebVersionTest.cpp, using the excellent Google Test framework:

#include <gtest/gtest.h>

#include "DebVersion.h"

TEST(DebVersionTest, ParsesUpstreamOnly)
{
  DebVersion v("1");
  EXPECT_EQ(0, v.Epoch());
  EXPECT_STREQ("1", v.Upstream());
  EXPECT_STREQ("0", v.Revision());
}

TEST(DebVersionTest, ParsesEpochs)
{
  DebVersion v("1:2.0.1");
  EXPECT_EQ(1, v.Epoch());
  EXPECT_STREQ("2.0.1", v.Upstream());
  EXPECT_STREQ("0", v.Revision());
}

TEST(DebVersionTest, ParsesRevisions)
{
  DebVersion v("10:4.0.1~alpha-4-5");
  EXPECT_EQ(10, v.Epoch());
  EXPECT_STREQ("4.0.1~alpha-4", v.Upstream());
  EXPECT_STREQ("5", v.Revision());
}

TEST(DebVersionTest, ComparesNumbers)
{
  EXPECT_LT(DebVersion("1"), DebVersion("2"));
  EXPECT_EQ(DebVersion("10"), DebVersion("10"));
  EXPECT_LT(DebVersion("9"), DebVersion("10"));
  EXPECT_GT(DebVersion("10"), DebVersion("9"));
}

TEST(DebVersionTest, ComparesEpochs)
{
  EXPECT_GT(DebVersion("2:1"), DebVersion("1:2"));
  EXPECT_LT(DebVersion("10"), DebVersion("1:2"));
}

TEST(DebVersionTest, ComparesAlphas)
{
  EXPECT_LT(DebVersion("alpha"), DebVersion("beta"));
  EXPECT_LT(DebVersion("alpha1"), DebVersion("alpha2"));
  EXPECT_GT(DebVersion("alpha10"), DebVersion("alpha2"));
}

TEST(DebVersionTest, ComparesTildes)
{
  EXPECT_LT(DebVersion("3.0~beta1"), DebVersion("3.0"));
  EXPECT_GT(DebVersion("3.0~beta"), DebVersion("3.0~~prebeta"));
  EXPECT_LT(DebVersion("3.0~beta4"), DebVersion("3.0~rc1"));
}

TEST(DebVersionTest, ComparesRevisions)
{
  EXPECT_LT(DebVersion("3.0-2"), DebVersion("3.0-10"));
}

No comments: