现在的位置: 首页 > 综合 > 正文

Levenshtein Edit distance

2017年12月15日 ⁄ 综合 ⁄ 共 1520字 ⁄ 字号 评论关闭

似乎在LeetCode里面也有,不过这里是为了解决另外的问题而做的。假设两个string,有三种不同的操作:添加,删除,替换。如何通过最少步骤让一个字符串变换成为另一个字符串?这个最少步骤数即为Levenshtein edit distance。做法是用动态规划,见http://en.wikipedia.org/wiki/Edit_distance

#include <cassert>
#include <iostream>
#include <memory>
#include <string>

int edit_distance(const std::string& s1, const std::string& s2) {
    const int s1_length = s1.length();
    const int s2_length = s2.length();
    const int s1_elems = s1_length + 1;
    const int s2_elems = s2_length + 1;
    const int total_elems = s1_elems * s2_elems;
    std::unique_ptr<int[]> distance_matrix(new int[total_elems]);

    auto access = [&distance_matrix, s1_elems, s2_elems](int index_s1, int index_s2) -> int& {
        assert(index_s1 >= 0 && index_s1 < s1_elems);
        assert(index_s2 >= 0 && index_s2 < s2_elems);
        return distance_matrix[index_s2 * s1_elems + index_s1];
    };


    std::fill(distance_matrix.get(), distance_matrix.get() + total_elems, 0);

    for(auto i = 0; i < s1_elems; ++i)
        access(i, 0) = i;
    for(auto i = 0; i < s2_elems; ++i)
        access(0, i) = i;

    // Note: the iterator starts from 0, but when we map it to the matrix, the
    // index should be increased by 1
    for(auto s1_iter = 0; s1_iter < s1_length; ++s1_iter)
        for(auto s2_iter = 0; s2_iter < s2_length; ++s2_iter) {
            const char s1_ch = s1[s1_iter];
            const char s2_ch = s2[s2_iter];

            if (s1_ch == s2_ch) {
                access(s1_iter + 1, s2_iter + 1) = access(s1_iter, s2_iter);
            }
            else {
                access(s1_iter + 1, s2_iter + 1) = std::min(std::min(
                            access(s1_iter + 1, s2_iter),
                            access(s1_iter, s2_iter + 1)),
                        access(s1_iter, s2_iter)) + 1;
            }
        }

    return access(s1_length, s2_length);
}

int main(int argc, char* argv[]) {
    if (argc != 3) {
        std::cerr<<"Usage: edit_distance [string1] [string2]"<<std::endl;
        return -1;
    }
    std::cout<<edit_distance(argv[1], argv[2])<<std::endl;
    return 0;
}

抱歉!评论已关闭.