Subscribed unsubscribe Subscribe Subscribe

Haskellで近傍探索

プログラミング

昨日の続き.型を変えて,searchを追加した.

import List
import Ratio

data Tree a = Leaf [String]
            | Node (String,Ratio Int) (Tree a) (Tree a) deriving Show

(<->) :: (Eq a) => [a] -> [a] -> [Bool]
x <-> y = zipWith (==) x y

tv :: [Bool] -> Ratio Int
tv [] = 0
tv x  = (length . elemIndices True) x % length x

dist :: (Eq a) => [a] -> [a] -> Ratio Int
dist x y = 1 - tv (x <-> y)

sortByDist :: (Ord a) => [a] -> [[a]] -> [[a]]
sortByDist _ []     = []
sortByDist h (x:xs) = sortByDist h [y | y <- xs, dist h y < dist h x]
                    ++ [x]
                    ++ sortByDist h [y | y <- xs, dist h y >= dist h x]

buildTree :: [String] -> Tree a
buildTree x = if length x == 1
              then Leaf x
              else Node (c,r) (buildTree left) (buildTree right)
    where sorted = sortByDist (head x) x
          (left,right) = splitAt (length sorted `div` 2) sorted
          c = head left
          r = dist c (last left)

search :: String -> Ratio Int -> Tree a -> [String]
search x d (Leaf (c:_))   = if dist x c <= d then [c] else []
search x d (Node (c,r) left right)
       | dist c x <= r - d = search x d left
       | dist c x >  r + d = search x d right
       | otherwise         = (search x d left) ++ (search x d right)

-- テストデータ
e = ["010101", "110101", "010010", "111010", "100100",
     "001011", "010010", "000010", "010001", "010010"]

tree = buildTree e -- 探索木

main :: IO ()
main = do putStrLn $ show $ search "010001" (1%6) tree
          putStrLn $ show $ search "010001" (1%12) tree

短く書きたいあまり,tvが悲惨なのには目をつぶることにする.近傍探索と完全一致探索を行う.結果は以下の通り.

["010101","010001"]
["010001"]

これは去年の講義で扱った近傍探索をHaskellで書き直したもの.そのときはLispで書いたが,Haskellの方がデータ型を扱いやすいためかわかりやすく書ける気がする.