diff options
author | Raúl Benencia <rul@kalgan.cc> | 2013-07-28 16:05:26 -0300 |
---|---|---|
committer | Raúl Benencia <rul@kalgan.cc> | 2013-07-28 16:05:26 -0300 |
commit | 180c539ec1fc6b2f698951842da8486638a03c4c (patch) | |
tree | 853834ee7dd72dccb63a34fb7c189e0a8e912ae7 /Rfc1342.hs |
Simple module to decode RFC1342 strings
Diffstat (limited to 'Rfc1342.hs')
-rw-r--r-- | Rfc1342.hs | 65 |
1 files changed, 65 insertions, 0 deletions
diff --git a/Rfc1342.hs b/Rfc1342.hs new file mode 100644 index 0000000..2699a26 --- /dev/null +++ b/Rfc1342.hs @@ -0,0 +1,65 @@ +-- A simple RFC1342 decoder +-- Copyright (C) 2013 Raúl Benencia <rul@kalgan.cc> +-- +-- This program is free software: you can redistribute it and/or modify +-- it under the terms of the GNU General Public License as published by +-- the Free Software Foundation, either version 3 of the License, or +-- (at your option) any later version. +-- +-- This program is distributed in the hope that it will be useful, +-- but WITHOUT ANY WARRANTY; without even the implied warranty of +-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-- GNU General Public License for more details. +-- +-- You should have received a copy of the GNU General Public License +-- along with this program. If not, see <http://www.gnu.org/licenses/>. + +module Rfc1342 (decodeField) where +-- This module is an ugly hack. You've been warned. + +import qualified Codec.Binary.Base64 as B64 +import qualified Codec.Binary.QuotedPrintable as QP + +import Data.Char (toLower, isSpace, chr) +import Data.List(isPrefixOf) +import Data.Word (Word8) + +import Data.Encoding(decodeString) + +-- Encoding imports. If you want to support more encodings, just add'em here. +import Data.Encoding.UTF8 +import Data.Encoding.ISO88591 +import Data.Encoding.ISO88598 +import Data.Encoding.ISO88599 + +decodeField :: String -> String +decodeField ('=':'?':cs) = decodeWithCharset dec rest + where (encoding, rest) = span (\c -> c /= '?') cs + dec = case (map toLower encoding) of + "utf-8" -> decodeString UTF8 + "iso-8859-1" -> decodeString ISO88591 + "iso-8859-8" -> decodeString ISO88598 + "iso-8859-9" -> decodeString ISO88599 + _ -> id +decodeField [] = [] +decodeField (c:cs) = c:decodeField cs + +decodeWithCharset dec [] = [] +decodeWithCharset dec ('?':c:'?':cs) | toLower c == 'b' = dataDecodeWith B64.decode + | toLower c == 'q' = dataDecodeWith QP.decode + | otherwise = cs + where (encoded, rest') = span (\c -> c /= '?') cs + rest = if "?=" `isPrefixOf` rest' + then drop 2 rest' + else rest' + dataDecodeWith datadec = (_2spc . dec . unwrap . datadec $ encoded) ++ (decodeField $ dropWhile isSpace rest) + +unwrap :: Maybe [Word8] -> String +unwrap Nothing = [] +unwrap (Just str) = bytesToString str + +bytesToString :: [Word8] -> String +bytesToString = map (chr . fromIntegral) + +-- Sometimes an underscore represents the SPACE character +_2spc = map (\x -> if x == '_' then ' ' else x)
\ No newline at end of file |