Plan 9 from Bell Labs’s /usr/web/sources/contrib/fernan/nhc98/tests/nofib/real/compress/Decode.hs

Copyright © 2021 Plan 9 Foundation.
Distributed under the MIT License.
Download the Plan 9 distribution.


{-
 - Decode.hs
 -
 - Module containing the code to decode LZW encodings
 -
 - Paul Sanders, Applications Research Division, BTL 1992
 -
 - DEC_VERSION 1 uses a list with keys in ascending order as a table, ie.
 - entry n is given by table!!n.
 -
 - DEC_VERSION 2 uses a list with keys in descending order as a table, ie.
 - entry n is given by table!!(#table-n). We don't need to calculate the
 - length of the table however as this is given by the value of the next
 - code to be added.
 -
 - DEC_VERSION 3 uses a balanced binary tree to store the keys. We can do
 - this cheaply by putting the key in the correct place straight away and
 - therefore not doing any rebalancing.
 -}

module Decode (decode)
where

import Prelude hiding( lookup )		-- lookup defined locally
import Defaults
import BinConv

data Optional a = NONE | SOME a deriving (Eq, Show{-was:Text-})

{- We ideally want to store the table as an array but these are inefficient
 - so we use a list instead. We don't use the tree used by encode since we
 - can make use of the fact that all our keys (the codes) come in order and
 - will be placed at the end of the table, at position 'code'.
 -
 - An entry of (SOME n, 'c') indicates that this code has prefix code n
 - and final character c.
 -}


{- Kick off the decoding giving the real function the first code value and
 - the initial table.
 -}

decode :: [Int] -> String
decode []
      = []
decode cs
      = decode' cs first_code init_table

{- decode` decodes the first character which is special since no new code
 - gets added for it. It is also special in so far as we know that the
 - code is a singleton character and thus has prefix NONE. The '@' is a
 - dummy character and can be anything.
 -}

decode' [] _ _ = []
decode' (c:cs) n t
      = ch : do_decode cs n c ch t
        where
        (NONE, ch) = lookup c t

{- do_decode decodes all the codes bar the first. 
 -
 - If the code is in the table (ie the code is less than the next code to be 
 - added) then we output the string for that code (using unfold if a prefix 
 - type) and add a new code to the table with the final character output as 
 - the extension and the previous code as prefix.
 -
 - If the code is not one we know about then we give it to decode_special for
 - special treatment
 -}

do_decode [] _ _ _ _ = []
do_decode (c:cs) n old_n fin_char t
      = if c >= n          -- we don't have this code in the table yet
        then decode_special (c:cs) n old_n fin_char t
        else outchs ++ do_decode cs n' c (head outchs) t'
        where
        outchs = reverse (unfold c (n-1) t)
        (n', t') = if n == max_entries
                   then (n, t)
                   else (n+1, insert n (SOME old_n, head outchs) t)

{- decode_special decodes a code that isn't in the table.
 -
 - The algorithm in Welch describes why this works, suffice it to say that
 - the output string is given by the last character output and the string
 - given by the previous code. An entry is also made in the table for the
 - last character output and the old code.
 -}

decode_special (c:cs) n old_n fin_char t
      = outchs ++ do_decode cs n' c (head outchs) t'
        where
        outchs = reverse (fin_char : unfold old_n (n-1) t)
        (n', t') = if n == max_entries
                   then (n, t)
                   else (n+1, insert n (SOME old_n, fin_char) t)

{- unfold a prefix code.
 -
 - chain back through the prefixes outputting the extension characters as we
 - go.
 -}

unfold n t_len t
      = if prefix == NONE
        then [c]
        else c : unfold n' t_len t
        where
        (prefix, c) = lookup n t
        SOME n' = prefix

data DecompTable = Branch DecompTable DecompTable | Leaf (Optional Int, Char) deriving (Show{-was:Text-})

{- Insert a code pair into the table. The position of the code is given by
 - the breakdown of the key into its binary digits
 -}

insert n v t = insert' (dec_to_binx code_bits n) v t

{- We can place a code exactly where it belongs using the following algorithm.
 - Take the code's binary rep expanded to the maximum number of bits. Start
 - at the first bit, if a 0 then insert the code to the left, if a 1 then
 - insert to the right. Carry on with the other bits until we run out and are
 - thus at the right place and can construct the node.
 -}

insert' [] v (Leaf _)
      = Leaf v
insert' ('0' : bs) v (Branch l r)
      = Branch (insert' bs v l) r
insert' ('1' : bs) v (Branch l r)
      = Branch l (insert' bs v r)
insert' ('0' : bs) v t
      = Branch (insert' bs v t) t
insert' ('1' : bs) v t
      = Branch t (insert' bs v t)

{- For a lookup we use the same mechanism to locate the position of the item
 - in the tree but if we find that the route has not been constructed or the
 - node has the dummy value then that code is not yet in the tree. The way
 - in which the decode algorithm works this should never happen.
 -}

lookup n t = lookup' (dec_to_binx code_bits n) t

lookup' [] (Leaf v)
      = v
lookup' ('0' : bs) (Branch l _)
      = lookup' bs l
lookup' ('1' : bs) (Branch _ r)
      = lookup' bs r
lookup' _  _ = error "tree insert error - seek professional help"

init_table = mk_init_table 0 (Leaf (SOME 99999, '@'))

mk_init_table 256 t = t
mk_init_table n t = mk_init_table (n+1) (insert n (NONE, toEnum n) t)


Bell Labs OSI certified Powered by Plan 9

(Return to Plan 9 Home Page)

Copyright © 2021 Plan 9 Foundation. All Rights Reserved.
Comments to [email protected].