{-# LANGUAGE BangPatterns, FlexibleContexts #-}

module Data.Clustering.Hierarchical.Internal.DistanceMatrix
    (singleLinkage
    ,completeLinkage
    ,upgma
    ,fakeAverageLinkage
    ) where

-- from base
import Control.Monad (forM_)
import Control.Monad.ST (ST, runST)
import Data.Array (listArray, (!))
import Data.Array.ST (STArray, STUArray, newArray_, newListArray, readArray, writeArray)
import Data.Function (on)
import Data.List (delete, tails, (\\))
import Data.STRef (STRef, newSTRef, readSTRef, writeSTRef)

-- from containers
import qualified Data.IntMap as IM

-- from this package
import Data.Clustering.Hierarchical.Internal.Types

mkErr :: String -> a
mkErr :: forall a. String -> a
mkErr = String -> a
forall a. HasCallStack => String -> a
error (String -> a) -> (String -> String) -> String -> a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (String
"Data.Clustering.Hierarchical.Internal.DistanceMatrix." String -> String -> String
forall a. [a] -> [a] -> [a]
++)

-- | Internal (to this package) type used to represent a cluster
-- (of possibly just one element).  The @key@ should be less than
-- or equal to all elements of the cluster.
data Cluster = Cluster { Cluster -> Int
key  :: {-# UNPACK #-} !Item  -- ^ Element used as key.
                       , Cluster -> Int
size :: {-# UNPACK #-} !Int   -- ^ At least one, the @key@.
                       }
               deriving (Cluster -> Cluster -> Bool
(Cluster -> Cluster -> Bool)
-> (Cluster -> Cluster -> Bool) -> Eq Cluster
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Cluster -> Cluster -> Bool
$c/= :: Cluster -> Cluster -> Bool
== :: Cluster -> Cluster -> Bool
$c== :: Cluster -> Cluster -> Bool
Eq, Eq Cluster
Eq Cluster
-> (Cluster -> Cluster -> Ordering)
-> (Cluster -> Cluster -> Bool)
-> (Cluster -> Cluster -> Bool)
-> (Cluster -> Cluster -> Bool)
-> (Cluster -> Cluster -> Bool)
-> (Cluster -> Cluster -> Cluster)
-> (Cluster -> Cluster -> Cluster)
-> Ord Cluster
Cluster -> Cluster -> Bool
Cluster -> Cluster -> Ordering
Cluster -> Cluster -> Cluster
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Cluster -> Cluster -> Cluster
$cmin :: Cluster -> Cluster -> Cluster
max :: Cluster -> Cluster -> Cluster
$cmax :: Cluster -> Cluster -> Cluster
>= :: Cluster -> Cluster -> Bool
$c>= :: Cluster -> Cluster -> Bool
> :: Cluster -> Cluster -> Bool
$c> :: Cluster -> Cluster -> Bool
<= :: Cluster -> Cluster -> Bool
$c<= :: Cluster -> Cluster -> Bool
< :: Cluster -> Cluster -> Bool
$c< :: Cluster -> Cluster -> Bool
compare :: Cluster -> Cluster -> Ordering
$ccompare :: Cluster -> Cluster -> Ordering
Ord, Int -> Cluster -> String -> String
[Cluster] -> String -> String
Cluster -> String
(Int -> Cluster -> String -> String)
-> (Cluster -> String)
-> ([Cluster] -> String -> String)
-> Show Cluster
forall a.
(Int -> a -> String -> String)
-> (a -> String) -> ([a] -> String -> String) -> Show a
showList :: [Cluster] -> String -> String
$cshowList :: [Cluster] -> String -> String
show :: Cluster -> String
$cshow :: Cluster -> String
showsPrec :: Int -> Cluster -> String -> String
$cshowsPrec :: Int -> Cluster -> String -> String
Show)

-- | An element of a cluster.
type Item = IM.Key

-- | Creates a singleton cluster.
singleton :: Item -> Cluster
singleton :: Int -> Cluster
singleton Int
k = Cluster :: Int -> Int -> Cluster
Cluster {key :: Int
key = Int
k, size :: Int
size = Int
1}

-- | /O(1)/. Joins two clusters, returns the 'key' that didn't
-- become 'key' of the new cluster as well.  Clusters are not
-- monoid because we don't have 'mempty'.
merge :: Cluster -> Cluster -> (Cluster, Item)
merge :: Cluster -> Cluster -> (Cluster, Int)
merge Cluster
c1 Cluster
c2 = let (Int
kl,Int
km) = if Cluster -> Int
key Cluster
c1 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Cluster -> Int
key Cluster
c2
                            then (Cluster -> Int
key Cluster
c1, Cluster -> Int
key Cluster
c2)
                            else (Cluster -> Int
key Cluster
c2, Cluster -> Int
key Cluster
c1)
              in (Cluster :: Int -> Int -> Cluster
Cluster {key :: Int
key  = Int
kl
                          ,size :: Int
size = Cluster -> Int
size Cluster
c1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Cluster -> Int
size Cluster
c2}
                 ,Int
km)




-- | A distance matrix.
data DistMatrix s =
    DM { forall s. DistMatrix s -> STUArray s (Int, Int) Distance
matrix   :: {-# UNPACK #-} !(STUArray s (Item, Item) Distance)
       , forall s. DistMatrix s -> STRef s [Int]
active   :: {-# UNPACK #-} !(STRef    s [Item])
       , forall s. DistMatrix s -> STArray s Int Cluster
clusters :: {-# UNPACK #-} !(STArray  s Item Cluster)
       }


-- | /O(n^2)/. Creates a list of possible combinations between
-- the given elements.
combinations :: [a] -> [(a,a)]
combinations :: forall a. [a] -> [(a, a)]
combinations [a]
xs = [(a
a,a
b) | (a
a:[a]
as) <- [a] -> [[a]]
forall a. [a] -> [[a]]
tails [a]
xs, a
b <- [a]
as]


-- | /O(n^2)/. Constructs a new distance matrix from a distance
-- function and a number @n@ of elements.  Elements will be drawn
-- from @[1..n]@
fromDistance :: (Item -> Item -> Distance) -> Item -> ST s (DistMatrix s)
fromDistance :: forall s. (Int -> Int -> Distance) -> Int -> ST s (DistMatrix s)
fromDistance Int -> Int -> Distance
_ Int
n | Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
2 = String -> ST s (DistMatrix s)
forall a. String -> a
mkErr String
"fromDistance: n < 2 is meaningless"
fromDistance Int -> Int -> Distance
dist Int
n = do
  STUArray s (Int, Int) Distance
matrix_ <- ((Int, Int), (Int, Int)) -> ST s (STUArray s (Int, Int) Distance)
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
(i, i) -> m (a i e)
newArray_ ((Int
1,Int
2), (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1,Int
n))
  STRef s [Int]
active_ <- [Int] -> ST s (STRef s [Int])
forall a s. a -> ST s (STRef s a)
newSTRef [Int
1..Int
n]
  [(Int, Int)] -> ((Int, Int) -> ST s ()) -> ST s ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([Int] -> [(Int, Int)]
forall a. [a] -> [(a, a)]
combinations [Int
1..Int
n]) (((Int, Int) -> ST s ()) -> ST s ())
-> ((Int, Int) -> ST s ()) -> ST s ()
forall a b. (a -> b) -> a -> b
$ \(Int, Int)
x -> STUArray s (Int, Int) Distance -> (Int, Int) -> Distance -> ST s ()
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
a i e -> i -> e -> m ()
writeArray STUArray s (Int, Int) Distance
matrix_ (Int, Int)
x ((Int -> Int -> Distance) -> (Int, Int) -> Distance
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry Int -> Int -> Distance
dist (Int, Int)
x)
  STArray s Int Cluster
clusters_ <- (Int, Int) -> [Cluster] -> ST s (STArray s Int Cluster)
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
(i, i) -> [e] -> m (a i e)
newListArray (Int
1,Int
n) ((Int -> Cluster) -> [Int] -> [Cluster]
forall a b. (a -> b) -> [a] -> [b]
map Int -> Cluster
singleton [Int
1..Int
n])
  DistMatrix s -> ST s (DistMatrix s)
forall (m :: * -> *) a. Monad m => a -> m a
return (DistMatrix s -> ST s (DistMatrix s))
-> DistMatrix s -> ST s (DistMatrix s)
forall a b. (a -> b) -> a -> b
$ DM :: forall s.
STUArray s (Int, Int) Distance
-> STRef s [Int] -> STArray s Int Cluster -> DistMatrix s
DM {matrix :: STUArray s (Int, Int) Distance
matrix   = STUArray s (Int, Int) Distance
matrix_
              ,active :: STRef s [Int]
active   = STRef s [Int]
active_
              ,clusters :: STArray s Int Cluster
clusters = STArray s Int Cluster
clusters_}


-- | /O(n^2)/. Returns the minimum distance of the distance
-- matrix.  The first key given is less than the second key.
findMin :: DistMatrix s -> ST s ((Cluster, Cluster), Distance)
findMin :: forall s. DistMatrix s -> ST s ((Cluster, Cluster), Distance)
findMin DistMatrix s
dm = STRef s [Int] -> ST s [Int]
forall s a. STRef s a -> ST s a
readSTRef (DistMatrix s -> STRef s [Int]
forall s. DistMatrix s -> STRef s [Int]
active DistMatrix s
dm) ST s [Int]
-> ([Int] -> ST s ((Cluster, Cluster), Distance))
-> ST s ((Cluster, Cluster), Distance)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= [Int] -> ST s ((Cluster, Cluster), Distance)
forall {m :: * -> *}.
(MArray (STUArray s) Distance m, MArray (STArray s) Cluster m) =>
[Int] -> m ((Cluster, Cluster), Distance)
go1
    where
      matrix_ :: STUArray s (Int, Int) Distance
matrix_ = DistMatrix s -> STUArray s (Int, Int) Distance
forall s. DistMatrix s -> STUArray s (Int, Int) Distance
matrix DistMatrix s
dm
      choose :: (a, b) -> a -> b -> (a, b)
choose (a, b)
b a
i b
m' = if b
m' b -> b -> Bool
forall a. Ord a => a -> a -> Bool
< (a, b) -> b
forall a b. (a, b) -> b
snd (a, b)
b then (a
i, b
m') else (a, b)
b

      go1 :: [Int] -> m ((Cluster, Cluster), Distance)
go1 is :: [Int]
is@(Int
i1:Int
i2:[Int]
_) = do Distance
di <- STUArray s (Int, Int) Distance -> (Int, Int) -> m Distance
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
a i e -> i -> m e
readArray STUArray s (Int, Int) Distance
matrix_ (Int
i1, Int
i2) -- initial
                            ((Int
b1, Int
b2), Distance
d) <- [Int] -> ((Int, Int), Distance) -> m ((Int, Int), Distance)
forall {m :: * -> *}.
MArray (STUArray s) Distance m =>
[Int] -> ((Int, Int), Distance) -> m ((Int, Int), Distance)
go2 [Int]
is ((Int
i1, Int
i2), Distance
di)
                            Cluster
c1 <- STArray s Int Cluster -> Int -> m Cluster
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
a i e -> i -> m e
readArray (DistMatrix s -> STArray s Int Cluster
forall s. DistMatrix s -> STArray s Int Cluster
clusters DistMatrix s
dm) Int
b1
                            Cluster
c2 <- STArray s Int Cluster -> Int -> m Cluster
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
a i e -> i -> m e
readArray (DistMatrix s -> STArray s Int Cluster
forall s. DistMatrix s -> STArray s Int Cluster
clusters DistMatrix s
dm) Int
b2
                            ((Cluster, Cluster), Distance) -> m ((Cluster, Cluster), Distance)
forall (m :: * -> *) a. Monad m => a -> m a
return ((Cluster
c1, Cluster
c2), Distance
d)
      go1 [Int]
_            = String -> m ((Cluster, Cluster), Distance)
forall a. String -> a
mkErr String
"findMin: empty DistMatrix"

      go2 :: [Int] -> ((Int, Int), Distance) -> m ((Int, Int), Distance)
go2 (Int
i1:is :: [Int]
is@(Int
_:[Int]
_)) !((Int, Int), Distance)
b = Int -> [Int] -> ((Int, Int), Distance) -> m ((Int, Int), Distance)
forall {m :: * -> *}.
MArray (STUArray s) Distance m =>
Int -> [Int] -> ((Int, Int), Distance) -> m ((Int, Int), Distance)
go3 Int
i1 [Int]
is ((Int, Int), Distance)
b m ((Int, Int), Distance)
-> (((Int, Int), Distance) -> m ((Int, Int), Distance))
-> m ((Int, Int), Distance)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= [Int] -> ((Int, Int), Distance) -> m ((Int, Int), Distance)
go2 [Int]
is
      go2 [Int]
_              ((Int, Int), Distance)
b = ((Int, Int), Distance) -> m ((Int, Int), Distance)
forall (m :: * -> *) a. Monad m => a -> m a
return ((Int, Int), Distance)
b

      go3 :: Int -> [Int] -> ((Int, Int), Distance) -> m ((Int, Int), Distance)
go3 Int
i1 (Int
i2:[Int]
is) !((Int, Int), Distance)
b = STUArray s (Int, Int) Distance -> (Int, Int) -> m Distance
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
a i e -> i -> m e
readArray STUArray s (Int, Int) Distance
matrix_ (Int
i1,Int
i2) m Distance
-> (Distance -> m ((Int, Int), Distance))
-> m ((Int, Int), Distance)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Int -> [Int] -> ((Int, Int), Distance) -> m ((Int, Int), Distance)
go3 Int
i1 [Int]
is (((Int, Int), Distance) -> m ((Int, Int), Distance))
-> (Distance -> ((Int, Int), Distance))
-> Distance
-> m ((Int, Int), Distance)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Int, Int), Distance)
-> (Int, Int) -> Distance -> ((Int, Int), Distance)
forall {b} {a}. Ord b => (a, b) -> a -> b -> (a, b)
choose ((Int, Int), Distance)
b (Int
i1,Int
i2)
      go3 Int
_  []       ((Int, Int), Distance)
b = ((Int, Int), Distance) -> m ((Int, Int), Distance)
forall (m :: * -> *) a. Monad m => a -> m a
return ((Int, Int), Distance)
b



-- | Type for functions that calculate distances between
-- clusters.
type ClusterDistance =
       (Cluster, Distance) -- ^ Cluster B1 and distance from A to B1
    -> (Cluster, Distance) -- ^ Cluster B2 and distance from A to B2
    -> Distance            -- ^ Distance from A to (B1 U B2).


-- Some cluster distances
cdistSingleLinkage      :: ClusterDistance
cdistSingleLinkage :: ClusterDistance
cdistSingleLinkage      = \(Cluster
_, Distance
d1) (Cluster
_, Distance
d2) -> Distance
d1 Distance -> Distance -> Distance
forall a. Ord a => a -> a -> a
`min` Distance
d2

cdistCompleteLinkage    :: ClusterDistance
cdistCompleteLinkage :: ClusterDistance
cdistCompleteLinkage    = \(Cluster
_, Distance
d1) (Cluster
_, Distance
d2) -> Distance
d1 Distance -> Distance -> Distance
forall a. Ord a => a -> a -> a
`max` Distance
d2

cdistUPGMA              :: ClusterDistance
cdistUPGMA :: ClusterDistance
cdistUPGMA              = \(Cluster
b1,Distance
d1) (Cluster
b2,Distance
d2) ->
                            let n1 :: Distance
n1 = Int -> Distance
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Cluster -> Int
size Cluster
b1)
                                n2 :: Distance
n2 = Int -> Distance
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Cluster -> Int
size Cluster
b2)
                            in (Distance
n1 Distance -> Distance -> Distance
forall a. Num a => a -> a -> a
* Distance
d1 Distance -> Distance -> Distance
forall a. Num a => a -> a -> a
+ Distance
n2 Distance -> Distance -> Distance
forall a. Num a => a -> a -> a
* Distance
d2) Distance -> Distance -> Distance
forall a. Fractional a => a -> a -> a
/ (Distance
n1 Distance -> Distance -> Distance
forall a. Num a => a -> a -> a
+ Distance
n2)

cdistFakeAverageLinkage :: ClusterDistance
cdistFakeAverageLinkage :: ClusterDistance
cdistFakeAverageLinkage = \(Cluster
_, Distance
d1) (Cluster
_, Distance
d2) -> (Distance
d1 Distance -> Distance -> Distance
forall a. Num a => a -> a -> a
+ Distance
d2) Distance -> Distance -> Distance
forall a. Fractional a => a -> a -> a
/ Distance
2



-- | /O(n)/. Merges two clusters, returning the new cluster and
-- the new distance matrix.
mergeClusters :: ClusterDistance
              -> DistMatrix s
              -> (Cluster, Cluster)
              -> ST s Cluster
mergeClusters :: forall s.
ClusterDistance
-> DistMatrix s -> (Cluster, Cluster) -> ST s Cluster
mergeClusters ClusterDistance
cdist (DM STUArray s (Int, Int) Distance
matrix_ STRef s [Int]
active_ STArray s Int Cluster
clusters_) (Cluster
b1, Cluster
b2) = do
  let (Cluster
bu, Int
kl) = Cluster
b1 Cluster -> Cluster -> (Cluster, Int)
`merge` Cluster
b2
      b1k :: Int
b1k = Cluster -> Int
key Cluster
b1
      b2k :: Int
b2k = Cluster -> Int
key Cluster
b2
      km :: Int
km  = Cluster -> Int
key Cluster
bu
      ix :: b -> b -> (b, b)
ix b
i b
j | b
i b -> b -> Bool
forall a. Ord a => a -> a -> Bool
< b
j     = (b
i,b
j)
             | Bool
otherwise = (b
j,b
i)

  -- Calculate new distances
  [Int]
activeV <- STRef s [Int] -> ST s [Int]
forall s a. STRef s a -> ST s a
readSTRef STRef s [Int]
active_
  [Int] -> (Int -> ST s ()) -> ST s ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([Int]
activeV [Int] -> [Int] -> [Int]
forall a. Eq a => [a] -> [a] -> [a]
\\ [Int
b1k, Int
b2k]) ((Int -> ST s ()) -> ST s ()) -> (Int -> ST s ()) -> ST s ()
forall a b. (a -> b) -> a -> b
$ \Int
k -> do
      -- a   <- readArray clusters_ k
      Distance
d_a_b1 <- STUArray s (Int, Int) Distance -> (Int, Int) -> ST s Distance
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
a i e -> i -> m e
readArray STUArray s (Int, Int) Distance
matrix_ ((Int, Int) -> ST s Distance) -> (Int, Int) -> ST s Distance
forall a b. (a -> b) -> a -> b
$ Int -> Int -> (Int, Int)
forall {b}. Ord b => b -> b -> (b, b)
ix Int
k Int
b1k
      Distance
d_a_b2 <- STUArray s (Int, Int) Distance -> (Int, Int) -> ST s Distance
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
a i e -> i -> m e
readArray STUArray s (Int, Int) Distance
matrix_ ((Int, Int) -> ST s Distance) -> (Int, Int) -> ST s Distance
forall a b. (a -> b) -> a -> b
$ Int -> Int -> (Int, Int)
forall {b}. Ord b => b -> b -> (b, b)
ix Int
k Int
b2k
      let d :: Distance
d = ClusterDistance
cdist (Cluster
b1, Distance
d_a_b1) (Cluster
b2, Distance
d_a_b2)
      STUArray s (Int, Int) Distance -> (Int, Int) -> Distance -> ST s ()
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
a i e -> i -> e -> m ()
writeArray STUArray s (Int, Int) Distance
matrix_ (Int -> Int -> (Int, Int)
forall {b}. Ord b => b -> b -> (b, b)
ix Int
k Int
km) (Distance -> ST s ()) -> Distance -> ST s ()
forall a b. (a -> b) -> a -> b
$! Distance
d

  -- Save new cluster, invalidate old one
  STArray s Int Cluster -> Int -> Cluster -> ST s ()
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
a i e -> i -> e -> m ()
writeArray STArray s Int Cluster
clusters_ Int
km Cluster
bu
  STArray s Int Cluster -> Int -> Cluster -> ST s ()
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
a i e -> i -> e -> m ()
writeArray STArray s Int Cluster
clusters_ Int
kl (Cluster -> ST s ()) -> Cluster -> ST s ()
forall a b. (a -> b) -> a -> b
$ String -> Cluster
forall a. String -> a
mkErr String
"mergeClusters: invalidated"
  STRef s [Int] -> [Int] -> ST s ()
forall s a. STRef s a -> a -> ST s ()
writeSTRef STRef s [Int]
active_ ([Int] -> ST s ()) -> [Int] -> ST s ()
forall a b. (a -> b) -> a -> b
$ Int -> [Int] -> [Int]
forall a. Eq a => a -> [a] -> [a]
delete Int
kl [Int]
activeV

  -- Return new cluster.
  Cluster -> ST s Cluster
forall (m :: * -> *) a. Monad m => a -> m a
return Cluster
bu


-- | Worker function to create dendrograms based on a
-- 'ClusterDistance'.
dendrogram' :: ClusterDistance -> [a] -> (a -> a -> Distance) -> Dendrogram a
dendrogram' :: forall a.
ClusterDistance -> [a] -> (a -> a -> Distance) -> Dendrogram a
dendrogram' ClusterDistance
_ []  a -> a -> Distance
_ = String -> Dendrogram a
forall a. String -> a
mkErr String
"dendrogram': empty input list"
dendrogram' ClusterDistance
_ [a
x] a -> a -> Distance
_ = a -> Dendrogram a
forall a. a -> Dendrogram a
Leaf a
x
dendrogram' ClusterDistance
cdist [a]
items a -> a -> Distance
dist = (forall s. ST s (Dendrogram a)) -> Dendrogram a
forall a. (forall s. ST s a) -> a
runST (() -> ST s (Dendrogram a)
forall {p} {s}. p -> ST s (Dendrogram a)
act ())
    where
      n :: Int
n = [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [a]
items
      act :: p -> ST s (Dendrogram a)
act p
_noMonomorphismRestrictionPlease = do
        let xs :: Array Int a
xs = (Int, Int) -> [a] -> Array Int a
forall i e. Ix i => (i, i) -> [e] -> Array i e
listArray (Int
1, Int
n) [a]
items
            im :: IntMap (Dendrogram a)
im = [(Int, Dendrogram a)] -> IntMap (Dendrogram a)
forall a. [(Int, a)] -> IntMap a
IM.fromDistinctAscList ([(Int, Dendrogram a)] -> IntMap (Dendrogram a))
-> [(Int, Dendrogram a)] -> IntMap (Dendrogram a)
forall a b. (a -> b) -> a -> b
$ [Int] -> [Dendrogram a] -> [(Int, Dendrogram a)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int
1..] ([Dendrogram a] -> [(Int, Dendrogram a)])
-> [Dendrogram a] -> [(Int, Dendrogram a)]
forall a b. (a -> b) -> a -> b
$ (a -> Dendrogram a) -> [a] -> [Dendrogram a]
forall a b. (a -> b) -> [a] -> [b]
map a -> Dendrogram a
forall a. a -> Dendrogram a
Leaf [a]
items
        (Int -> Int -> Distance) -> Int -> ST s (DistMatrix s)
forall s. (Int -> Int -> Distance) -> Int -> ST s (DistMatrix s)
fromDistance (a -> a -> Distance
dist (a -> a -> Distance) -> (Int -> a) -> Int -> Int -> Distance
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` (Array Int a
xs Array Int a -> Int -> a
forall i e. Ix i => Array i e -> i -> e
!)) Int
n ST s (DistMatrix s)
-> (DistMatrix s -> ST s (Dendrogram a)) -> ST s (Dendrogram a)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Int -> IntMap (Dendrogram a) -> DistMatrix s -> ST s (Dendrogram a)
forall {t} {a} {s}.
(Eq t, Num t) =>
t -> IntMap (Dendrogram a) -> DistMatrix s -> ST s (Dendrogram a)
go (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1) IntMap (Dendrogram a)
im
      go :: t -> IntMap (Dendrogram a) -> DistMatrix s -> ST s (Dendrogram a)
go !t
i !IntMap (Dendrogram a)
ds !DistMatrix s
dm = do
        ((Cluster
c1,Cluster
c2), Distance
distance) <- DistMatrix s -> ST s ((Cluster, Cluster), Distance)
forall s. DistMatrix s -> ST s ((Cluster, Cluster), Distance)
findMin DistMatrix s
dm
        Cluster
cu <- ClusterDistance
-> DistMatrix s -> (Cluster, Cluster) -> ST s Cluster
forall s.
ClusterDistance
-> DistMatrix s -> (Cluster, Cluster) -> ST s Cluster
mergeClusters ClusterDistance
cdist DistMatrix s
dm (Cluster
c1,Cluster
c2)
        let dendro :: Cluster -> IntMap a -> (Maybe a, IntMap a)
dendro Cluster
c = (Int -> a -> Maybe a) -> Int -> IntMap a -> (Maybe a, IntMap a)
forall a.
(Int -> a -> Maybe a) -> Int -> IntMap a -> (Maybe a, IntMap a)
IM.updateLookupWithKey (\Int
_ a
_ -> Maybe a
forall a. Maybe a
Nothing) (Cluster -> Int
key Cluster
c)
            (Just Dendrogram a
d1, !IntMap (Dendrogram a)
ds')  = Cluster
-> IntMap (Dendrogram a)
-> (Maybe (Dendrogram a), IntMap (Dendrogram a))
forall {a}. Cluster -> IntMap a -> (Maybe a, IntMap a)
dendro Cluster
c1 IntMap (Dendrogram a)
ds
            (Just Dendrogram a
d2, !IntMap (Dendrogram a)
ds'') = Cluster
-> IntMap (Dendrogram a)
-> (Maybe (Dendrogram a), IntMap (Dendrogram a))
forall {a}. Cluster -> IntMap a -> (Maybe a, IntMap a)
dendro Cluster
c2 IntMap (Dendrogram a)
ds'
            du :: Dendrogram a
du = Distance -> Dendrogram a -> Dendrogram a -> Dendrogram a
forall a. Distance -> Dendrogram a -> Dendrogram a -> Dendrogram a
Branch Distance
distance Dendrogram a
d1 Dendrogram a
d2
        case t
i of
          t
1 -> Dendrogram a -> ST s (Dendrogram a)
forall (m :: * -> *) a. Monad m => a -> m a
return Dendrogram a
du
          t
_ -> let !ds''' :: IntMap (Dendrogram a)
ds''' = Int
-> Dendrogram a -> IntMap (Dendrogram a) -> IntMap (Dendrogram a)
forall a. Int -> a -> IntMap a -> IntMap a
IM.insert (Cluster -> Int
key Cluster
cu) Dendrogram a
du IntMap (Dendrogram a)
ds''
               in Dendrogram a
du Dendrogram a -> ST s (Dendrogram a) -> ST s (Dendrogram a)
`seq` t -> IntMap (Dendrogram a) -> DistMatrix s -> ST s (Dendrogram a)
go (t
it -> t -> t
forall a. Num a => a -> a -> a
-t
1) IntMap (Dendrogram a)
ds''' DistMatrix s
dm


-- | /O(n^3)/ time and /O(n^2)/ space. Calculates a complete,
-- rooted dendrogram for a list of items using single linkage
-- with the naïve algorithm using a distance matrix.
singleLinkage :: [a] -> (a -> a -> Distance) -> Dendrogram a
singleLinkage :: forall a. [a] -> (a -> a -> Distance) -> Dendrogram a
singleLinkage = ClusterDistance -> [a] -> (a -> a -> Distance) -> Dendrogram a
forall a.
ClusterDistance -> [a] -> (a -> a -> Distance) -> Dendrogram a
dendrogram' ClusterDistance
cdistSingleLinkage


-- | /O(n^3)/ time and /O(n^2)/ space. Calculates a complete,
-- rooted dendrogram for a list of items using complete linkage
-- with the naïve algorithm using a distance matrix.
completeLinkage :: [a] -> (a -> a -> Distance) -> Dendrogram a
completeLinkage :: forall a. [a] -> (a -> a -> Distance) -> Dendrogram a
completeLinkage = ClusterDistance -> [a] -> (a -> a -> Distance) -> Dendrogram a
forall a.
ClusterDistance -> [a] -> (a -> a -> Distance) -> Dendrogram a
dendrogram' ClusterDistance
cdistCompleteLinkage


-- | /O(n^3)/ time and /O(n^2)/ space. Calculates a complete,
-- rooted dendrogram for a list of items using UPGMA with the
-- naïve algorithm using a distance matrix.
upgma :: [a] -> (a -> a -> Distance) -> Dendrogram a
upgma :: forall a. [a] -> (a -> a -> Distance) -> Dendrogram a
upgma = ClusterDistance -> [a] -> (a -> a -> Distance) -> Dendrogram a
forall a.
ClusterDistance -> [a] -> (a -> a -> Distance) -> Dendrogram a
dendrogram' ClusterDistance
cdistUPGMA


-- | /O(n^3)/ time and /O(n^2)/ space. Calculates a complete,
-- rooted dendrogram for a list of items using fake average
-- linkage with the naïve algorithm using a distance matrix.
fakeAverageLinkage :: [a]
                   -> (a -> a -> Distance) -> Dendrogram a
fakeAverageLinkage :: forall a. [a] -> (a -> a -> Distance) -> Dendrogram a
fakeAverageLinkage = ClusterDistance -> [a] -> (a -> a -> Distance) -> Dendrogram a
forall a.
ClusterDistance -> [a] -> (a -> a -> Distance) -> Dendrogram a
dendrogram' ClusterDistance
cdistFakeAverageLinkage