Mercurial > lbo > hg > juliaplay
changeset 20:9a9329349f98
DP: Levenshtein distance
author | Lewin Bormann <lbo@spheniscida.de> |
---|---|
date | Thu, 02 Mar 2023 20:32:28 +0100 |
parents | 5625644a818d |
children | 4b24b380679d |
files | julia/dp.jl |
diffstat | 1 files changed, 46 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- a/julia/dp.jl Tue Feb 28 21:51:15 2023 +0100 +++ b/julia/dp.jl Thu Mar 02 20:32:28 2023 +0100 @@ -297,3 +297,49 @@ end end +function insert(s::AbstractString, i::Integer, c::Char)::String + string((@view s[begin:i-1]), c, (@view s[i:end])) +end + +function deleteat(s::AbstractString, i::Integer)::String + string((@view s[begin:i-1]), (@view s[i+1:end])) +end + +function replaceat(s::AbstractString, i::Integer, c::Char)::String + string((@view s[begin:i-1]), c, (@view s[i+1:end])) +end + +function levenshtein(a::AbstractString, b::AbstractString, i=(1, length(a)), j=(1, length(b)))::Int + il, ih = i + jl, jh = j + + # Case 1: Either string is empty. + if il > ih + return 0 + end + if jl > jh + return 0 + end + + # Case 2: End characters are identical. + if a[ih] == b[jh] + return levenshtein(a, b, (il, ih-1), (jl, jh-1)) + end + + # Case 3: End characters are different. + + # 3a. Insert b[end] into a + a_ = insert(a, ih+1, b[jh]) + case_a = 1 + levenshtein(a_, b, (il, ih), (jl, jh-1)) + + # 3b. Delete a[ih] + a__ = deleteat(a, ih) + case_b = 1 + levenshtein(a__, b, (il, ih-1), (jl, jh)) + + # 3c. Replace a[ih] with b[jh] + a___ = replaceat(a, ih, b[jh]) + case_c = 1 + levenshtein(a___, b, (il, ih-1), (jl, jh-1)) + + min(case_a, case_b, case_c) +end +