{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE MultiWayIf #-}
module Data.Text.Metrics
(
levenshtein
, levenshteinNorm
, damerauLevenshtein
, damerauLevenshteinNorm
, overlap
, jaccard
, hamming
, jaro
, jaroWinkler )
where
import Control.Monad
import Control.Monad.ST
import Data.Map.Strict (Map)
import Data.Ratio
import Data.Text
import GHC.Exts (inline)
import qualified Data.Map.Strict as M
import qualified Data.Text as T
import qualified Data.Text.Unsafe as TU
import qualified Data.Vector.Unboxed.Mutable as VUM
#if !MIN_VERSION_base(4,8,0)
import Control.Applicative
#endif
levenshtein :: Text -> Text -> Int
levenshtein :: Text -> Text -> Int
levenshtein a :: Text
a b :: Text
b = (Int, Int) -> Int
forall a b. (a, b) -> a
fst (Text -> Text -> (Int, Int)
levenshtein_ Text
a Text
b)
levenshteinNorm :: Text -> Text -> Ratio Int
levenshteinNorm :: Text -> Text -> Ratio Int
levenshteinNorm = (Text -> Text -> (Int, Int)) -> Text -> Text -> Ratio Int
norm Text -> Text -> (Int, Int)
levenshtein_
levenshtein_ :: Text -> Text -> (Int, Int)
levenshtein_ :: Text -> Text -> (Int, Int)
levenshtein_ a :: Text
a b :: Text
b
| Text -> Bool
T.null Text
a = (Int
lenb, Int
lenm)
| Text -> Bool
T.null Text
b = (Int
lena, Int
lenm)
| Bool
otherwise = (forall s. ST s (Int, Int)) -> (Int, Int)
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s (Int, Int)) -> (Int, Int))
-> (forall s. ST s (Int, Int)) -> (Int, Int)
forall a b. (a -> b) -> a -> b
$ do
let v_len :: Int
v_len = Int
lenb Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1
MVector s Int
v <- Int -> ST s (MVector (PrimState (ST s)) Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> m (MVector (PrimState m) a)
VUM.unsafeNew (Int
v_len Int -> Int -> Int
forall a. Num a => a -> a -> a
* 2)
let gov :: Int -> ST s ()
gov !Int
i =
Bool -> ST s () -> ST s ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
v_len) (ST s () -> ST s ()) -> ST s () -> ST s ()
forall a b. (a -> b) -> a -> b
$ do
MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
v Int
i Int
i
Int -> ST s ()
gov (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1)
goi :: Int -> Int -> Int -> Int -> ST s ()
goi !Int
i !Int
na !Int
v0 !Int
v1 = do
let !(TU.Iter ai :: Char
ai da :: Int
da) = Text -> Int -> Iter
TU.iter Text
a Int
na
goj :: Int -> Int -> ST s ()
goj !Int
j !Int
nb =
Bool -> ST s () -> ST s ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Int
j Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
lenb) (ST s () -> ST s ()) -> ST s () -> ST s ()
forall a b. (a -> b) -> a -> b
$ do
let !(TU.Iter bj :: Char
bj db :: Int
db) = Text -> Int -> Iter
TU.iter Text
b Int
nb
cost :: Int
cost = if Char
ai Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
bj then 0 else 1
Int
x <- (Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1) (Int -> Int) -> ST s Int -> ST s Int
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
v (Int
v1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
j)
Int
y <- (Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1) (Int -> Int) -> ST s Int -> ST s Int
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
v (Int
v0 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1)
Int
z <- (Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
cost) (Int -> Int) -> ST s Int -> ST s Int
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
v (Int
v0 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
j)
MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
v (Int
v1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1) (Int -> Int -> Int
forall a. Ord a => a -> a -> a
min Int
x (Int -> Int -> Int
forall a. Ord a => a -> a -> a
min Int
y Int
z))
Int -> Int -> ST s ()
goj (Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1) (Int
nb Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
db)
Bool -> ST s () -> ST s ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
lena) (ST s () -> ST s ()) -> ST s () -> ST s ()
forall a b. (a -> b) -> a -> b
$ do
MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
v Int
v1 (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1)
Int -> Int -> ST s ()
goj 0 0
Int -> Int -> Int -> Int -> ST s ()
goi (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1) (Int
na Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
da) Int
v1 Int
v0
Int -> ST s ()
gov 0
Int -> Int -> Int -> Int -> ST s ()
goi 0 0 0 Int
v_len
Int
ld <- MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
v (Int
lenb Int -> Int -> Int
forall a. Num a => a -> a -> a
+ if Int -> Bool
forall a. Integral a => a -> Bool
even Int
lena then 0 else Int
v_len)
(Int, Int) -> ST s (Int, Int)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
ld, Int
lenm)
where
lena :: Int
lena = Text -> Int
T.length Text
a
lenb :: Int
lenb = Text -> Int
T.length Text
b
lenm :: Int
lenm = Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
lena Int
lenb
{-# INLINE levenshtein_ #-}
damerauLevenshtein :: Text -> Text -> Int
damerauLevenshtein :: Text -> Text -> Int
damerauLevenshtein a :: Text
a b :: Text
b = (Int, Int) -> Int
forall a b. (a, b) -> a
fst (Text -> Text -> (Int, Int)
damerauLevenshtein_ Text
a Text
b)
damerauLevenshteinNorm :: Text -> Text -> Ratio Int
damerauLevenshteinNorm :: Text -> Text -> Ratio Int
damerauLevenshteinNorm = (Text -> Text -> (Int, Int)) -> Text -> Text -> Ratio Int
norm Text -> Text -> (Int, Int)
damerauLevenshtein_
damerauLevenshtein_ :: Text -> Text -> (Int, Int)
damerauLevenshtein_ :: Text -> Text -> (Int, Int)
damerauLevenshtein_ a :: Text
a b :: Text
b
| Text -> Bool
T.null Text
a = (Int
lenb, Int
lenm)
| Text -> Bool
T.null Text
b = (Int
lena, Int
lenm)
| Bool
otherwise = (forall s. ST s (Int, Int)) -> (Int, Int)
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s (Int, Int)) -> (Int, Int))
-> (forall s. ST s (Int, Int)) -> (Int, Int)
forall a b. (a -> b) -> a -> b
$ do
let v_len :: Int
v_len = Int
lenb Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1
MVector s Int
v <- Int -> ST s (MVector (PrimState (ST s)) Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> m (MVector (PrimState m) a)
VUM.unsafeNew (Int
v_len Int -> Int -> Int
forall a. Num a => a -> a -> a
* 3)
let gov :: Int -> ST s ()
gov !Int
i =
Bool -> ST s () -> ST s ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
v_len) (ST s () -> ST s ()) -> ST s () -> ST s ()
forall a b. (a -> b) -> a -> b
$ do
MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
v Int
i Int
i
Int -> ST s ()
gov (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1)
goi :: Int -> Int -> Char -> Int -> Int -> Int -> ST s ()
goi !Int
i !Int
na !Char
ai_1 !Int
v0 !Int
v1 !Int
v2 = do
let !(TU.Iter ai :: Char
ai da :: Int
da) = Text -> Int -> Iter
TU.iter Text
a Int
na
goj :: Int -> Int -> Char -> ST s ()
goj !Int
j !Int
nb !Char
bj_1 =
Bool -> ST s () -> ST s ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Int
j Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
lenb) (ST s () -> ST s ()) -> ST s () -> ST s ()
forall a b. (a -> b) -> a -> b
$ do
let !(TU.Iter bj :: Char
bj db :: Int
db) = Text -> Int -> Iter
TU.iter Text
b Int
nb
cost :: Int
cost = if Char
ai Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
bj then 0 else 1
Int
x <- (Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1) (Int -> Int) -> ST s Int -> ST s Int
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
v (Int
v1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
j)
Int
y <- (Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1) (Int -> Int) -> ST s Int -> ST s Int
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
v (Int
v0 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1)
Int
z <- (Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
cost) (Int -> Int) -> ST s Int -> ST s Int
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
v (Int
v0 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
j)
let g :: Int
g = Int -> Int -> Int
forall a. Ord a => a -> a -> a
min Int
x (Int -> Int -> Int
forall a. Ord a => a -> a -> a
min Int
y Int
z)
Int
val <- (Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
cost) (Int -> Int) -> ST s Int -> ST s Int
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
v (Int
v2 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
- 1)
MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
v (Int
v1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1) (Int -> ST s ()) -> Int -> ST s ()
forall a b. (a -> b) -> a -> b
$
if Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> 0 Bool -> Bool -> Bool
&& Int
j Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> 0 Bool -> Bool -> Bool
&& Char
ai Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
bj_1 Bool -> Bool -> Bool
&& Char
ai_1 Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
bj Bool -> Bool -> Bool
&& Int
val Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
g
then Int
val
else Int
g
Int -> Int -> Char -> ST s ()
goj (Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1) (Int
nb Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
db) Char
bj
Bool -> ST s () -> ST s ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
lena) (ST s () -> ST s ()) -> ST s () -> ST s ()
forall a b. (a -> b) -> a -> b
$ do
MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
v Int
v1 (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1)
Int -> Int -> Char -> ST s ()
goj 0 0 'a'
Int -> Int -> Char -> Int -> Int -> Int -> ST s ()
goi (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1) (Int
na Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
da) Char
ai Int
v1 Int
v2 Int
v0
Int -> ST s ()
gov 0
Int -> Int -> Char -> Int -> Int -> Int -> ST s ()
goi 0 0 'a' 0 Int
v_len (Int
v_len Int -> Int -> Int
forall a. Num a => a -> a -> a
* 2)
Int
ld <- MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
v (Int
lenb Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
lena Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` 3) Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
v_len)
(Int, Int) -> ST s (Int, Int)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
ld, Int
lenm)
where
lena :: Int
lena = Text -> Int
T.length Text
a
lenb :: Int
lenb = Text -> Int
T.length Text
b
lenm :: Int
lenm = Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
lena Int
lenb
{-# INLINE damerauLevenshtein_ #-}
overlap :: Text -> Text -> Ratio Int
overlap :: Text -> Text -> Ratio Int
overlap a :: Text
a b :: Text
b =
if Int
d Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== 0
then 1 Int -> Int -> Ratio Int
forall a. Integral a => a -> a -> Ratio a
% 1
else Map Char Int -> Map Char Int -> Int
intersectionSize (Text -> Map Char Int
mkTextMap Text
a) (Text -> Map Char Int
mkTextMap Text
b) Int -> Int -> Ratio Int
forall a. Integral a => a -> a -> Ratio a
% Int
d
where
d :: Int
d = Int -> Int -> Int
forall a. Ord a => a -> a -> a
min (Text -> Int
T.length Text
a) (Text -> Int
T.length Text
b)
jaccard :: Text -> Text -> Ratio Int
jaccard :: Text -> Text -> Ratio Int
jaccard a :: Text
a b :: Text
b =
if Int
d Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== 0
then 1 Int -> Int -> Ratio Int
forall a. Integral a => a -> a -> Ratio a
% 1
else Map Char Int -> Map Char Int -> Int
intersectionSize Map Char Int
ma Map Char Int
mb Int -> Int -> Ratio Int
forall a. Integral a => a -> a -> Ratio a
% Int
d
where
ma :: Map Char Int
ma = Text -> Map Char Int
mkTextMap Text
a
mb :: Map Char Int
mb = Text -> Map Char Int
mkTextMap Text
b
d :: Int
d = Map Char Int -> Map Char Int -> Int
unionSize Map Char Int
ma Map Char Int
mb
mkTextMap :: Text -> Map Char Int
mkTextMap :: Text -> Map Char Int
mkTextMap = (Map Char Int -> Char -> Map Char Int)
-> Map Char Int -> Text -> Map Char Int
forall a. (a -> Char -> a) -> a -> Text -> a
T.foldl' Map Char Int -> Char -> Map Char Int
forall k a. (Ord k, Num a) => Map k a -> k -> Map k a
f Map Char Int
forall k a. Map k a
M.empty
where
f :: Map k a -> k -> Map k a
f m :: Map k a
m ch :: k
ch = (a -> a -> a) -> k -> a -> Map k a -> Map k a
forall k a. Ord k => (a -> a -> a) -> k -> a -> Map k a -> Map k a
M.insertWith a -> a -> a
forall a. Num a => a -> a -> a
(+) k
ch 1 Map k a
m
{-# INLINE mkTextMap #-}
intersectionSize :: Map Char Int -> Map Char Int -> Int
intersectionSize :: Map Char Int -> Map Char Int -> Int
intersectionSize a :: Map Char Int
a b :: Map Char Int
b = (Int -> Int -> Int) -> Int -> Map Char Int -> Int
forall a b k. (a -> b -> a) -> a -> Map k b -> a
M.foldl' Int -> Int -> Int
forall a. Num a => a -> a -> a
(+) 0 ((Int -> Int -> Int) -> Map Char Int -> Map Char Int -> Map Char Int
forall k a b c.
Ord k =>
(a -> b -> c) -> Map k a -> Map k b -> Map k c
M.intersectionWith Int -> Int -> Int
forall a. Ord a => a -> a -> a
min Map Char Int
a Map Char Int
b)
{-# INLINE intersectionSize #-}
unionSize :: Map Char Int -> Map Char Int -> Int
unionSize :: Map Char Int -> Map Char Int -> Int
unionSize a :: Map Char Int
a b :: Map Char Int
b = (Int -> Int -> Int) -> Int -> Map Char Int -> Int
forall a b k. (a -> b -> a) -> a -> Map k b -> a
M.foldl' Int -> Int -> Int
forall a. Num a => a -> a -> a
(+) 0 ((Int -> Int -> Int) -> Map Char Int -> Map Char Int -> Map Char Int
forall k a. Ord k => (a -> a -> a) -> Map k a -> Map k a -> Map k a
M.unionWith Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Map Char Int
a Map Char Int
b)
{-# INLINE unionSize #-}
hamming :: Text -> Text -> Maybe Int
hamming :: Text -> Text -> Maybe Int
hamming a :: Text
a b :: Text
b =
if Text -> Int
T.length Text
a Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Text -> Int
T.length Text
b
then Int -> Maybe Int
forall a. a -> Maybe a
Just (Int -> Int -> Int -> Int
forall t. Num t => Int -> Int -> t -> t
go 0 0 0)
else Maybe Int
forall a. Maybe a
Nothing
where
go :: Int -> Int -> t -> t
go !Int
na !Int
nb !t
r =
let !(TU.Iter cha :: Char
cha da :: Int
da) = Text -> Int -> Iter
TU.iter Text
a Int
na
!(TU.Iter chb :: Char
chb db :: Int
db) = Text -> Int -> Iter
TU.iter Text
b Int
nb
in if | Int
na Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
len -> t
r
| Char
cha Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
chb -> Int -> Int -> t -> t
go (Int
na Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
da) (Int
nb Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
db) (t
r t -> t -> t
forall a. Num a => a -> a -> a
+ 1)
| Bool
otherwise -> Int -> Int -> t -> t
go (Int
na Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
da) (Int
nb Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
db) t
r
len :: Int
len = Text -> Int
TU.lengthWord16 Text
a
jaro :: Text -> Text -> Ratio Int
jaro :: Text -> Text -> Ratio Int
jaro a :: Text
a b :: Text
b =
if Text -> Bool
T.null Text
a Bool -> Bool -> Bool
|| Text -> Bool
T.null Text
b
then 0 Int -> Int -> Ratio Int
forall a. Integral a => a -> a -> Ratio a
% 1
else (forall s. ST s (Ratio Int)) -> Ratio Int
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s (Ratio Int)) -> Ratio Int)
-> (forall s. ST s (Ratio Int)) -> Ratio Int
forall a b. (a -> b) -> a -> b
$ do
let lena :: Int
lena = Text -> Int
T.length Text
a
lenb :: Int
lenb = Text -> Int
T.length Text
b
d :: Int
d =
if Int
lena Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= 2 Bool -> Bool -> Bool
&& Int
lenb Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= 2
then Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
lena Int
lenb Int -> Int -> Int
forall a. Integral a => a -> a -> a
`quot` 2 Int -> Int -> Int
forall a. Num a => a -> a -> a
- 1
else 0
MVector s Int
v <- Int -> Int -> ST s (MVector (PrimState (ST s)) Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> a -> m (MVector (PrimState m) a)
VUM.replicate Int
lenb (0 :: Int)
MVector s Int
r <- Int -> Int -> ST s (MVector (PrimState (ST s)) Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> a -> m (MVector (PrimState m) a)
VUM.replicate 3 (0 :: Int)
let goi :: Int -> Int -> Int -> ST s ()
goi !Int
i !Int
na !Int
fromb = do
let !(TU.Iter ai :: Char
ai da :: Int
da) = Text -> Int -> Iter
TU.iter Text
a Int
na
(from :: Int
from, fromb' :: Int
fromb') =
if Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
d
then (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
d, Int
fromb Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Text -> Int -> Int
TU.iter_ Text
b Int
fromb)
else (0, 0)
to :: Int
to = Int -> Int -> Int
forall a. Ord a => a -> a -> a
min (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
d Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1) Int
lenb
goj :: Int -> Int -> ST s ()
goj !Int
j !Int
nb =
Bool -> ST s () -> ST s ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Int
j Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
to) (ST s () -> ST s ()) -> ST s () -> ST s ()
forall a b. (a -> b) -> a -> b
$ do
let !(TU.Iter bj :: Char
bj db :: Int
db) = Text -> Int -> Iter
TU.iter Text
b Int
nb
Bool
used <- (Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== 1) (Int -> Bool) -> ST s Int -> ST s Bool
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
v Int
j
if Bool -> Bool
not Bool
used Bool -> Bool -> Bool
&& Char
ai Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
bj
then do
Int
tj <- MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
r 0
if Int
j Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
tj
then MVector (PrimState (ST s)) Int -> (Int -> Int) -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> (a -> a) -> Int -> m ()
VUM.unsafeModify MVector s Int
MVector (PrimState (ST s)) Int
r (Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1) 2
else MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
r 0 Int
j
MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
v Int
j 1
MVector (PrimState (ST s)) Int -> (Int -> Int) -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> (a -> a) -> Int -> m ()
VUM.unsafeModify MVector s Int
MVector (PrimState (ST s)) Int
r (Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1) 1
else Int -> Int -> ST s ()
goj (Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1) (Int
nb Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
db)
Bool -> ST s () -> ST s ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
lena) (ST s () -> ST s ()) -> ST s () -> ST s ()
forall a b. (a -> b) -> a -> b
$ do
Int -> Int -> ST s ()
goj Int
from Int
fromb
Int -> Int -> Int -> ST s ()
goi (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1) (Int
na Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
da) Int
fromb'
Int -> Int -> Int -> ST s ()
goi 0 0 0
Int
m <- MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
r 1
Int
t <- MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
r 2
Ratio Int -> ST s (Ratio Int)
forall (m :: * -> *) a. Monad m => a -> m a
return (Ratio Int -> ST s (Ratio Int)) -> Ratio Int -> ST s (Ratio Int)
forall a b. (a -> b) -> a -> b
$
if Int
m Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== 0
then 0 Int -> Int -> Ratio Int
forall a. Integral a => a -> a -> Ratio a
% 1
else ((Int
m Int -> Int -> Ratio Int
forall a. Integral a => a -> a -> Ratio a
% Int
lena) Ratio Int -> Ratio Int -> Ratio Int
forall a. Num a => a -> a -> a
+
(Int
m Int -> Int -> Ratio Int
forall a. Integral a => a -> a -> Ratio a
% Int
lenb) Ratio Int -> Ratio Int -> Ratio Int
forall a. Num a => a -> a -> a
+
((Int
m Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
t) Int -> Int -> Ratio Int
forall a. Integral a => a -> a -> Ratio a
% Int
m)) Ratio Int -> Ratio Int -> Ratio Int
forall a. Fractional a => a -> a -> a
/ 3
jaroWinkler :: Text -> Text -> Ratio Int
jaroWinkler :: Text -> Text -> Ratio Int
jaroWinkler a :: Text
a b :: Text
b = Ratio Int
dj Ratio Int -> Ratio Int -> Ratio Int
forall a. Num a => a -> a -> a
+ (1 Int -> Int -> Ratio Int
forall a. Integral a => a -> a -> Ratio a
% 10) Ratio Int -> Ratio Int -> Ratio Int
forall a. Num a => a -> a -> a
* Ratio Int
l Ratio Int -> Ratio Int -> Ratio Int
forall a. Num a => a -> a -> a
* (1 Ratio Int -> Ratio Int -> Ratio Int
forall a. Num a => a -> a -> a
- Ratio Int
dj)
where
dj :: Ratio Int
dj = Ratio Int -> Ratio Int
forall a. a -> a
inline (Text -> Text -> Ratio Int
jaro Text
a Text
b)
l :: Ratio Int
l = Int -> Ratio Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Text -> Text -> Int
commonPrefix Text
a Text
b)
commonPrefix :: Text -> Text -> Int
commonPrefix :: Text -> Text -> Int
commonPrefix a :: Text
a b :: Text
b = Int -> Int -> Int -> Int
forall t. Num t => Int -> Int -> t -> t
go 0 0 0
where
go :: Int -> Int -> t -> t
go !Int
na !Int
nb !t
r =
let !(TU.Iter cha :: Char
cha da :: Int
da) = Text -> Int -> Iter
TU.iter Text
a Int
na
!(TU.Iter chb :: Char
chb db :: Int
db) = Text -> Int -> Iter
TU.iter Text
b Int
nb
in if | Int
na Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
lena -> t
r
| Int
nb Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
lenb -> t
r
| Char
cha Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
chb -> Int -> Int -> t -> t
go (Int
na Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
da) (Int
nb Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
db) (t
r t -> t -> t
forall a. Num a => a -> a -> a
+ 1)
| Bool
otherwise -> t
r
lena :: Int
lena = Text -> Int
TU.lengthWord16 Text
a
lenb :: Int
lenb = Text -> Int
TU.lengthWord16 Text
b
{-# INLINE commonPrefix #-}
norm :: (Text -> Text -> (Int, Int)) -> Text -> Text -> Ratio Int
norm :: (Text -> Text -> (Int, Int)) -> Text -> Text -> Ratio Int
norm f :: Text -> Text -> (Int, Int)
f a :: Text
a b :: Text
b =
let (r :: Int
r, l :: Int
l) = Text -> Text -> (Int, Int)
f Text
a Text
b
in if Int
r Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== 0
then 1 Int -> Int -> Ratio Int
forall a. Integral a => a -> a -> Ratio a
% 1
else 1 Int -> Int -> Ratio Int
forall a. Integral a => a -> a -> Ratio a
% 1 Ratio Int -> Ratio Int -> Ratio Int
forall a. Num a => a -> a -> a
- Int
r Int -> Int -> Ratio Int
forall a. Integral a => a -> a -> Ratio a
% Int
l
{-# INLINE norm #-}