LeetCode 72. Edit Distance (LaTeX)

Given two words word1 and word2, find the minimum number of operations required to convert word1 to word2.

You have the following 3 operations permitted on a word:

  1. Insert a character
  2. Delete a character
  3. Replace a character

Example

Example 1:

Input: word1 = "horse", word2 = "ros"
Output: 3
Explanation: 
horse -> rorse (replace 'h' with 'r')
rorse -> rose (remove 'r')
rose -> ros (remove 'e')

Example 2:

Input: word1 = "intention", word2 = "execution"
Output: 5
Explanation: 
intention -> inention (remove 't')
inention -> enention (replace 'i' with 'e')
enention -> exention (replace 'n' with 'x')
exention -> exection (replace 'n' with 'c')
exection -> execution (insert 'u')

Solution

This is a very classic DP problem. The major challenge is to implement 2d arrays in LaTeX. Here, I used the l3intarray package with an index converter.

\documentclass{article}
\usepackage[a4paper]{geometry}
\usepackage[T1]{fontenc}
\usepackage{mathptmx}
\usepackage{amsmath, amssymb}
\usepackage{tasks}
\usepackage{datetime2}
\usepackage{expl3}

\begin{document}


\ExplSyntaxOn

% reference: https://leetcode.com/problems/edit-distance/discuss/712872/Python-DP-%2B-Explanation-of-tree-structure

\int_new:N \l_dist_int
\int_new:N \l_tmpc_int
\int_new:N \l_tmpd_int
\int_new:N \l_tmpe_int
\str_new:N \l_tmpc_str
\str_new:N \l_tmpd_str


\cs_new:Npn \get_index:nn #1#2 {
    \int_eval:n {(#1) * \l_tmpb_int + (#2) + 1}
}

\cs_new:Npn \get_index: {
    \get_index:nn {\l_tmpc_int} {\l_tmpd_int}
}

% find the min value of elements in a seq
\cs_new:Npn \seq_int_min:n #1 {
    \int_compare:nNnT {#1} < {\l_tmpe_int} {
        \int_set:Nn \l_tmpe_int {#1}
    }
}

\cs_new:Npn \edit_distance:nn #1#2 {
    \str_set:Nn \l_tmpa_str {#1}
    \int_set:Nn \l_tmpa_int {\str_count:N \l_tmpa_str}
    \str_set:Nn \l_tmpb_str {#2}
    \int_set:Nn \l_tmpb_int {\str_count:N \l_tmpb_str}
    
    %\int_show:N \l_tmpa_int
    %\int_show:N \l_tmpb_int

    \bool_if:nTF {
        \int_compare_p:nNn {\l_tmpa_int} = {0} &&
        \int_compare_p:nNn {\l_tmpb_int} = {0}
    } {
        % if both strings are empty
        \int_set:Nn \l_dist_int {0}
    }
    {
        \bool_if:nTF {
            \int_compare_p:nNn {\l_tmpa_int} = {0} ||
            \int_compare_p:nNn {\l_tmpb_int} = {0}
        } {
            % if one of the strings is empty
            \int_set:Nn \l_dist_int {1}
        } {
            %\par two~non-empty~strings
            % otherwise, create DP array
            \cs_if_exist:NT \g_tmpa_intarr {
                \cs_gset_eq:NN \g_tmpa_intarr \undefined
            }
            
            \int_incr:N \l_tmpa_int
            \int_incr:N \l_tmpb_int
            \intarray_new:Nn \g_tmpa_intarr {\l_tmpa_int * \l_tmpb_int}
            \intarray_gzero:N \g_tmpa_intarr
            
            \int_set:Nn \l_tmpc_int {0}

            \int_do_until:nNnn {\l_tmpc_int} = {\l_tmpa_int} {
                \int_set:Nn \l_tmpd_int {0}
                \int_do_until:nNnn {\l_tmpd_int} = {\l_tmpb_int} {
                    
                    %\int_show:N \l_tmpc_int
                    %\int_show:N \l_tmpd_int
                    \int_compare:nNnTF {\l_tmpc_int} = {0} {
                        \intarray_gset:Nnn \g_tmpa_intarr {\get_index:} {\l_tmpd_int}
                    } {
                        \int_compare:nNnTF {\l_tmpd_int} = {0} {
                            \intarray_gset:Nnn \g_tmpa_intarr {\get_index:} {\l_tmpc_int}
                        }
                        {
                            \exp_args:NNx \str_set:Nn \l_tmpc_str {\str_item:Nn \l_tmpa_str {\l_tmpc_int - 1}}
                            \exp_args:NNx \str_set:Nn \l_tmpd_str {\str_item:Nn \l_tmpb_str {\l_tmpd_int - 1}}
                            \str_if_eq:NNTF \l_tmpc_str \l_tmpd_str {
                                \int_set:Nn \l_tmpe_int {\intarray_item:Nn \g_tmpa_intarr {\get_index:nn {\l_tmpc_int - 1} {\l_tmpd_int - 1}}}
                                \intarray_gset:Nnn \g_tmpa_intarr {\get_index:} {\l_tmpe_int}
                            } {
                                \seq_clear:N \l_tmpa_seq
                                \exp_args:NNx \seq_push:Nn \l_tmpa_seq {\intarray_item:Nn \g_tmpa_intarr {\get_index:nn {\l_tmpc_int} {\l_tmpd_int - 1}}}
                                \exp_args:NNx \seq_push:Nn \l_tmpa_seq {\intarray_item:Nn \g_tmpa_intarr {\get_index:nn {\l_tmpc_int - 1} {\l_tmpd_int}}}
                                \exp_args:NNx \seq_push:Nn \l_tmpa_seq {\intarray_item:Nn \g_tmpa_intarr {\get_index:nn {\l_tmpc_int - 1} {\l_tmpd_int - 1}}}
                                \seq_show:N \l_tmpa_seq
                                \seq_get_left:NN \l_tmpa_seq \l_tmpa_tl
                                \exp_args:NNo \int_set:Nn \l_tmpe_int {\l_tmpa_tl}
                                \seq_map_function:NN \l_tmpa_seq \seq_int_min:n
                                \int_incr:N \l_tmpe_int
                                \intarray_gset:Nnn \g_tmpa_intarr {\get_index:} {\l_tmpe_int}
                            }
                        }
                    }
                    
                    \int_incr:N \l_tmpd_int
                }
                \int_incr:N \l_tmpc_int
            }
        }
    }
    \intarray_show:N \g_tmpa_intarr
    \int_set:Nn \l_dist_int {\intarray_item:Nn \g_tmpa_intarr {\get_index:nn {\l_tmpa_int - 1} {\l_tmpb_int - 1}}}
}

\newcommand{\editdist}[2]{
    \edit_distance:nn {#1}{#2}
    \int_use:N \l_dist_int
}

\ExplSyntaxOff


\par\editdist{horse}{ros}
\par\editdist{intention}{execution}

\par\DTMNow

\end{document} 

Output

3
5
2020-07-03 18:58:38-04:00