changeset 20:9a9329349f98

DP: Levenshtein distance
author Lewin Bormann <lbo@spheniscida.de>
date Thu, 02 Mar 2023 20:32:28 +0100
parents 5625644a818d
children 4b24b380679d
files julia/dp.jl
diffstat 1 files changed, 46 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/julia/dp.jl	Tue Feb 28 21:51:15 2023 +0100
+++ b/julia/dp.jl	Thu Mar 02 20:32:28 2023 +0100
@@ -297,3 +297,49 @@
     end
 end
 
+function insert(s::AbstractString, i::Integer, c::Char)::String
+    string((@view s[begin:i-1]), c, (@view s[i:end]))
+end
+
+function deleteat(s::AbstractString, i::Integer)::String
+    string((@view s[begin:i-1]), (@view s[i+1:end]))
+end
+
+function replaceat(s::AbstractString, i::Integer, c::Char)::String
+    string((@view s[begin:i-1]), c, (@view s[i+1:end]))
+end
+
+function levenshtein(a::AbstractString, b::AbstractString, i=(1, length(a)), j=(1, length(b)))::Int
+    il, ih = i
+    jl, jh = j
+
+    # Case 1: Either string is empty.
+    if il > ih
+        return 0
+    end
+    if jl > jh
+        return 0
+    end
+
+    # Case 2: End characters are identical.
+    if a[ih] == b[jh]
+        return levenshtein(a, b, (il, ih-1), (jl, jh-1))
+    end
+
+    # Case 3: End characters are different.
+
+    # 3a. Insert b[end] into a
+    a_ = insert(a, ih+1, b[jh])
+    case_a = 1 + levenshtein(a_, b, (il, ih), (jl, jh-1))
+
+    # 3b. Delete a[ih]
+    a__ = deleteat(a, ih)
+    case_b = 1 + levenshtein(a__, b, (il, ih-1), (jl, jh))
+
+    # 3c. Replace a[ih] with b[jh]
+    a___ = replaceat(a, ih, b[jh])
+    case_c = 1 + levenshtein(a___, b, (il, ih-1), (jl, jh-1))
+
+    min(case_a, case_b, case_c)
+end
+