{-# LANGUAGE OverloadedStrings #-}
module Text.XmlHtml.XML.Parse where
import Control.Applicative
import Control.Monad
import Data.Char
import Data.List
import Data.Maybe
import Text.XmlHtml.Common
import Text.XmlHtml.TextParser
import qualified Text.Parsec as P
import Data.Map (Map)
import qualified Data.Map as M
import Data.Text (Text)
import qualified Data.Text as T
docFragment :: Encoding -> Parser Document
docFragment :: Encoding -> Parser Document
docFragment e :: Encoding
e = do
(dt :: Maybe DocType
dt, nodes1 :: [Node]
nodes1) <- Parser (Maybe DocType, [Node])
prolog
[Node]
nodes2 <- Parser [Node]
content
Document -> Parser Document
forall (m :: * -> *) a. Monad m => a -> m a
return (Document -> Parser Document) -> Document -> Parser Document
forall a b. (a -> b) -> a -> b
$ Encoding -> Maybe DocType -> [Node] -> Document
XmlDocument Encoding
e Maybe DocType
dt ([Node]
nodes1 [Node] -> [Node] -> [Node]
forall a. [a] -> [a] -> [a]
++ [Node]
nodes2)
whiteSpace :: Parser ()
whiteSpace :: Parser ()
whiteSpace = ParsecT Text () Identity Char -> ParsecT Text () Identity [Char]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
some ((Char -> Bool) -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
P.satisfy (Char -> [Char] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [' ','\t','\r','\n'])) ParsecT Text () Identity [Char] -> Parser () -> Parser ()
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> () -> Parser ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
isNameStartChar :: Char -> Bool
isNameStartChar :: Char -> Bool
isNameStartChar c :: Char
c | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== ':' = Bool
True
| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== '_' = Bool
True
| Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= 'a' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= 'z' = Bool
True
| Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= 'A' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= 'Z' = Bool
True
| Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= '\xc0' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= '\xd6' = Bool
True
| Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= '\xd8' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= '\xf6' = Bool
True
| Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= '\xf8' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= '\x2ff' = Bool
True
| Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= '\x370' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= '\x37d' = Bool
True
| Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= '\x37f' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= '\x1fff' = Bool
True
| Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= '\x200c' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= '\x200d' = Bool
True
| Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= '\x2070' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= '\x218f' = Bool
True
| Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= '\x2c00' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= '\x2fef' = Bool
True
| Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= '\x3001' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= '\xd7ff' = Bool
True
| Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= '\xf900' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= '\xfdcf' = Bool
True
| Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= '\xfdf0' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= '\xfffd' = Bool
True
| Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= '\x10000' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= '\xeffff' = Bool
True
| Bool
otherwise = Bool
False
isNameChar :: Char -> Bool
isNameChar :: Char -> Bool
isNameChar c :: Char
c | Char -> Bool
isNameStartChar Char
c = Bool
True
| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== '-' = Bool
True
| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== '.' = Bool
True
| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== '\xb7' = Bool
True
| Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= '0' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= '9' = Bool
True
| Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= '\x300' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= '\x36f' = Bool
True
| Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= '\x203f' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= '\x2040' = Bool
True
| Bool
otherwise = Bool
False
name :: Parser Text
name :: Parser Text
name = do
Char
c <- (Char -> Bool) -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
P.satisfy Char -> Bool
isNameStartChar
Text
r <- (Char -> Bool) -> Parser Text
takeWhile0 Char -> Bool
isNameChar
Text -> Parser Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Parser Text) -> Text -> Parser Text
forall a b. (a -> b) -> a -> b
$ Char -> Text -> Text
T.cons Char
c Text
r
attrValue :: Parser Text
attrValue :: Parser Text
attrValue = ([Text] -> Text) -> ParsecT Text () Identity [Text] -> Parser Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap [Text] -> Text
T.concat (ParsecT Text () Identity [Text]
singleQuoted ParsecT Text () Identity [Text]
-> ParsecT Text () Identity [Text]
-> ParsecT Text () Identity [Text]
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ParsecT Text () Identity [Text]
doubleQuoted)
where
singleQuoted :: ParsecT Text () Identity [Text]
singleQuoted = Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char '\'' ParsecT Text () Identity Char
-> ParsecT Text () Identity [Text]
-> ParsecT Text () Identity [Text]
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> [Char] -> ParsecT Text () Identity [Text]
forall (t :: * -> *).
Foldable t =>
t Char -> ParsecT Text () Identity [Text]
refTill ['<','&','\''] ParsecT Text () Identity [Text]
-> ParsecT Text () Identity Char -> ParsecT Text () Identity [Text]
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char '\''
doubleQuoted :: ParsecT Text () Identity [Text]
doubleQuoted = Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char '"' ParsecT Text () Identity Char
-> ParsecT Text () Identity [Text]
-> ParsecT Text () Identity [Text]
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> [Char] -> ParsecT Text () Identity [Text]
forall (t :: * -> *).
Foldable t =>
t Char -> ParsecT Text () Identity [Text]
refTill ['<','&','"'] ParsecT Text () Identity [Text]
-> ParsecT Text () Identity Char -> ParsecT Text () Identity [Text]
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char '"'
refTill :: t Char -> ParsecT Text () Identity [Text]
refTill end :: t Char
end = Parser Text -> ParsecT Text () Identity [Text]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many ((Char -> Bool) -> Parser Text
takeWhile1 (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> t Char -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` t Char
end)) Parser Text -> Parser Text -> Parser Text
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text
reference)
systemLiteral :: Parser Text
systemLiteral :: Parser Text
systemLiteral = Parser Text
singleQuoted Parser Text -> Parser Text -> Parser Text
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text
doubleQuoted
where
singleQuoted :: Parser Text
singleQuoted = do
Char
_ <- Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char '\''
Text
x <- (Char -> Bool) -> Parser Text
takeWhile0 (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== '\''))
Char
_ <- Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char '\''
Text -> Parser Text
forall (m :: * -> *) a. Monad m => a -> m a
return Text
x
doubleQuoted :: Parser Text
doubleQuoted = do
Char
_ <- Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char '\"'
Text
x <- (Char -> Bool) -> Parser Text
takeWhile0 (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== '\"'))
Char
_ <- Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char '\"'
Text -> Parser Text
forall (m :: * -> *) a. Monad m => a -> m a
return Text
x
pubIdLiteral :: Parser Text
pubIdLiteral :: Parser Text
pubIdLiteral = Parser Text
singleQuoted Parser Text -> Parser Text -> Parser Text
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text
doubleQuoted
where
singleQuoted :: Parser Text
singleQuoted = do
Char
_ <- Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char '\''
Text
x <- (Char -> Bool) -> Parser Text
takeWhile0 (\c :: Char
c -> Char -> Bool
isPubIdChar Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= '\'')
Char
_ <- Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char '\''
Text -> Parser Text
forall (m :: * -> *) a. Monad m => a -> m a
return Text
x
doubleQuoted :: Parser Text
doubleQuoted = do
Char
_ <- Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char '\"'
Text
x <- (Char -> Bool) -> Parser Text
takeWhile0 Char -> Bool
isPubIdChar
Char
_ <- Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char '\"'
Text -> Parser Text
forall (m :: * -> *) a. Monad m => a -> m a
return Text
x
isPubIdChar :: Char -> Bool
isPubIdChar :: Char -> Bool
isPubIdChar c :: Char
c | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= 'a' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= 'z' = Bool
True
| Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= 'A' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= 'Z' = Bool
True
| Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= '0' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= '9' = Bool
True
| Char
c Char -> [Char] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Char]
otherChars = Bool
True
| Bool
otherwise = Bool
False
where
otherChars :: [Char]
otherChars = " \r\n-\'()+,./:=?;!*#@$_%" :: [Char]
charData :: Parser Node
charData :: Parser Node
charData = Text -> Node
TextNode (Text -> Node) -> Parser Text -> Parser Node
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser Text
takeWhile1 (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> [Char] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` ['<','&']))
comment :: Parser (Maybe Node)
= Text -> Parser Text
text "<!--" Parser Text -> Parser (Maybe Node) -> Parser (Maybe Node)
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (Node -> Maybe Node
forall a. a -> Maybe a
Just (Node -> Maybe Node) -> (Text -> Node) -> Text -> Maybe Node
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> Node
Comment (Text -> Maybe Node) -> Parser Text -> Parser (Maybe Node)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text
commentText) Parser (Maybe Node) -> Parser Text -> Parser (Maybe Node)
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Text -> Parser Text
text "-->"
where
commentText :: Parser Text
commentText = ([Text] -> Text) -> ParsecT Text () Identity [Text] -> Parser Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap [Text] -> Text
T.concat (ParsecT Text () Identity [Text] -> Parser Text)
-> ParsecT Text () Identity [Text] -> Parser Text
forall a b. (a -> b) -> a -> b
$ Parser Text -> ParsecT Text () Identity [Text]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (Parser Text -> ParsecT Text () Identity [Text])
-> Parser Text -> ParsecT Text () Identity [Text]
forall a b. (a -> b) -> a -> b
$
Parser Text
nonDash Parser Text -> Parser Text -> Parser Text
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text -> Parser Text
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
P.try (Char -> Text -> Text
T.cons (Char -> Text -> Text)
-> ParsecT Text () Identity Char
-> ParsecT Text () Identity (Text -> Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char '-' ParsecT Text () Identity (Text -> Text)
-> Parser Text -> Parser Text
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser Text
nonDash)
nonDash :: Parser Text
nonDash = (Char -> Bool) -> Parser Text
takeWhile1 (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== '-'))
processingInstruction :: Parser (Maybe Node)
processingInstruction :: Parser (Maybe Node)
processingInstruction = do
Text
_ <- Text -> Parser Text
text "<?"
()
_ <- Parser ()
piTarget
[Char]
_ <- ParsecT Text () Identity [Char]
forall u. ParsecT Text u Identity [Char]
emptyEnd ParsecT Text () Identity [Char]
-> ParsecT Text () Identity [Char]
-> ParsecT Text () Identity [Char]
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ParsecT Text () Identity [Char]
contentEnd
Maybe Node -> Parser (Maybe Node)
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe Node
forall a. Maybe a
Nothing
where
emptyEnd :: ParsecT Text u Identity [Char]
emptyEnd = ParsecT Text u Identity [Char] -> ParsecT Text u Identity [Char]
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
P.try ([Char] -> ParsecT Text u Identity [Char]
forall s (m :: * -> *) u.
Stream s m Char =>
[Char] -> ParsecT s u m [Char]
P.string "?>")
contentEnd :: ParsecT Text () Identity [Char]
contentEnd = ParsecT Text () Identity [Char] -> ParsecT Text () Identity [Char]
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
P.try (ParsecT Text () Identity [Char]
-> ParsecT Text () Identity [Char])
-> ParsecT Text () Identity [Char]
-> ParsecT Text () Identity [Char]
forall a b. (a -> b) -> a -> b
$ do
()
_ <- Parser ()
whiteSpace
ParsecT Text () Identity Char
-> Parser Text -> ParsecT Text () Identity [Char]
forall s (m :: * -> *) t u a end.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m end -> ParsecT s u m [a]
P.manyTill ParsecT Text () Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
P.anyChar (Parser Text -> Parser Text
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
P.try (Parser Text -> Parser Text) -> Parser Text -> Parser Text
forall a b. (a -> b) -> a -> b
$ Text -> Parser Text
text "?>")
piTarget :: Parser ()
piTarget :: Parser ()
piTarget = do
Text
n <- Parser Text
name
Bool -> Parser () -> Parser ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when ((Char -> Char) -> Text -> Text
T.map Char -> Char
toLower Text
n Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== "xml") (Parser () -> Parser ()) -> Parser () -> Parser ()
forall a b. (a -> b) -> a -> b
$ [Char] -> Parser ()
forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail "xml declaration can't occur here"
cdata :: [Char] -> Parser a -> Parser Node
cdata :: [Char] -> Parser a -> Parser Node
cdata cs :: [Char]
cs end :: Parser a
end = Text -> Node
TextNode (Text -> Node) -> ([Text] -> Text) -> [Text] -> Node
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Text] -> Text
T.concat ([Text] -> Node) -> ParsecT Text () Identity [Text] -> Parser Node
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text -> Parser a -> ParsecT Text () Identity [Text]
forall s (m :: * -> *) t u a end.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m end -> ParsecT s u m [a]
P.manyTill Parser Text
part Parser a
end
where part :: Parser Text
part = (Char -> Bool) -> Parser Text
takeWhile1 (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> [Char] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Char]
cs))
Parser Text -> Parser Text -> Parser Text
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Char -> Text
T.singleton (Char -> Text) -> ParsecT Text () Identity Char -> Parser Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ParsecT Text () Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
P.anyChar
cdSect :: Parser (Maybe Node)
cdSect :: Parser (Maybe Node)
cdSect = Node -> Maybe Node
forall a. a -> Maybe a
Just (Node -> Maybe Node) -> Parser Node -> Parser (Maybe Node)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> do
Text
_ <- Text -> Parser Text
text "<![CDATA["
[Char] -> Parser Text -> Parser Node
forall a. [Char] -> Parser a -> Parser Node
cdata "]" (Text -> Parser Text
text "]]>")
prolog :: Parser (Maybe DocType, [Node])
prolog :: Parser (Maybe DocType, [Node])
prolog = do
Maybe (Maybe Text)
_ <- ParsecT Text () Identity (Maybe Text)
-> ParsecT Text () Identity (Maybe (Maybe Text))
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional ParsecT Text () Identity (Maybe Text)
xmlDecl
[Maybe Node]
nodes1 <- Parser (Maybe Node) -> ParsecT Text () Identity [Maybe Node]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many Parser (Maybe Node)
misc
Maybe (DocType, [Maybe Node])
rest <- ParsecT Text () Identity (DocType, [Maybe Node])
-> ParsecT Text () Identity (Maybe (DocType, [Maybe Node]))
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional (ParsecT Text () Identity (DocType, [Maybe Node])
-> ParsecT Text () Identity (Maybe (DocType, [Maybe Node])))
-> ParsecT Text () Identity (DocType, [Maybe Node])
-> ParsecT Text () Identity (Maybe (DocType, [Maybe Node]))
forall a b. (a -> b) -> a -> b
$ do
DocType
dt <- Parser DocType
docTypeDecl
[Maybe Node]
nodes2 <- Parser (Maybe Node) -> ParsecT Text () Identity [Maybe Node]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many Parser (Maybe Node)
misc
(DocType, [Maybe Node])
-> ParsecT Text () Identity (DocType, [Maybe Node])
forall (m :: * -> *) a. Monad m => a -> m a
return (DocType
dt, [Maybe Node]
nodes2)
case Maybe (DocType, [Maybe Node])
rest of
Nothing -> (Maybe DocType, [Node]) -> Parser (Maybe DocType, [Node])
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe DocType
forall a. Maybe a
Nothing, [Maybe Node] -> [Node]
forall a. [Maybe a] -> [a]
catMaybes [Maybe Node]
nodes1)
Just (dt :: DocType
dt, nodes2 :: [Maybe Node]
nodes2) -> (Maybe DocType, [Node]) -> Parser (Maybe DocType, [Node])
forall (m :: * -> *) a. Monad m => a -> m a
return (DocType -> Maybe DocType
forall a. a -> Maybe a
Just DocType
dt, [Maybe Node] -> [Node]
forall a. [Maybe a] -> [a]
catMaybes ([Maybe Node]
nodes1 [Maybe Node] -> [Maybe Node] -> [Maybe Node]
forall a. [a] -> [a] -> [a]
++ [Maybe Node]
nodes2))
xmlDecl :: Parser (Maybe Text)
xmlDecl :: ParsecT Text () Identity (Maybe Text)
xmlDecl = do
Text
_ <- Text -> Parser Text
text "<?xml"
()
_ <- Parser ()
versionInfo
Maybe Text
e <- Parser Text -> ParsecT Text () Identity (Maybe Text)
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser Text
encodingDecl
Maybe ()
_ <- Parser () -> ParsecT Text () Identity (Maybe ())
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser ()
sdDecl
Maybe ()
_ <- Parser () -> ParsecT Text () Identity (Maybe ())
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser ()
whiteSpace
Text
_ <- Text -> Parser Text
text "?>"
Maybe Text -> ParsecT Text () Identity (Maybe Text)
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe Text
e
versionInfo :: Parser ()
versionInfo :: Parser ()
versionInfo = do
Parser ()
whiteSpace Parser () -> Parser Text -> Parser Text
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Text -> Parser Text
text "version" Parser Text -> Parser () -> Parser ()
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ()
eq Parser () -> Parser () -> Parser ()
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (Parser ()
singleQuoted Parser () -> Parser () -> Parser ()
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser ()
doubleQuoted)
where
singleQuoted :: Parser ()
singleQuoted = Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char '\'' ParsecT Text () Identity Char -> Parser () -> Parser ()
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ()
versionNum Parser () -> ParsecT Text () Identity Char -> Parser ()
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char '\''
doubleQuoted :: Parser ()
doubleQuoted = Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char '\"' ParsecT Text () Identity Char -> Parser () -> Parser ()
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ()
versionNum Parser () -> ParsecT Text () Identity Char -> Parser ()
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char '\"'
versionNum :: Parser ()
versionNum = do
Text
_ <- Text -> Parser Text
text "1."
[Char]
_ <- ParsecT Text () Identity Char -> ParsecT Text () Identity [Char]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
some ((Char -> Bool) -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
P.satisfy (\c :: Char
c -> Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= '0' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= '9'))
() -> Parser ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
eq :: Parser ()
eq :: Parser ()
eq = Parser () -> ParsecT Text () Identity (Maybe ())
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser ()
whiteSpace ParsecT Text () Identity (Maybe ())
-> ParsecT Text () Identity Char -> ParsecT Text () Identity Char
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char '=' ParsecT Text () Identity Char
-> ParsecT Text () Identity (Maybe ())
-> ParsecT Text () Identity (Maybe ())
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser () -> ParsecT Text () Identity (Maybe ())
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser ()
whiteSpace ParsecT Text () Identity (Maybe ()) -> Parser () -> Parser ()
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> () -> Parser ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
misc :: Parser (Maybe Node)
misc :: Parser (Maybe Node)
misc = Parser (Maybe Node)
comment Parser (Maybe Node) -> Parser (Maybe Node) -> Parser (Maybe Node)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser (Maybe Node)
processingInstruction Parser (Maybe Node) -> Parser (Maybe Node) -> Parser (Maybe Node)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Parser ()
whiteSpace Parser () -> Parser (Maybe Node) -> Parser (Maybe Node)
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Maybe Node -> Parser (Maybe Node)
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe Node
forall a. Maybe a
Nothing)
docTypeDecl :: Parser DocType
docTypeDecl :: Parser DocType
docTypeDecl = do
Text
_ <- Text -> Parser Text
text "<!DOCTYPE"
Parser ()
whiteSpace
Text
tag <- Parser Text
name
Maybe ()
_ <- Parser () -> ParsecT Text () Identity (Maybe ())
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser ()
whiteSpace
ExternalID
extid <- Parser ExternalID
externalID
Maybe ()
_ <- Parser () -> ParsecT Text () Identity (Maybe ())
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser ()
whiteSpace
InternalSubset
intsub <- Parser InternalSubset
internalDoctype
Char
_ <- Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char '>'
DocType -> Parser DocType
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> ExternalID -> InternalSubset -> DocType
DocType Text
tag ExternalID
extid InternalSubset
intsub)
data InternalDoctypeState = IDSStart
| IDSScanning Int
| IDSInQuote Int Char
| Int
| Int
| Int
| Int
| Int
| Int
internalDoctype :: Parser InternalSubset
internalDoctype :: Parser InternalSubset
internalDoctype = Text -> InternalSubset
InternalText (Text -> InternalSubset)
-> ([Char] -> Text) -> [Char] -> InternalSubset
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Char] -> Text
T.pack ([Char] -> InternalSubset)
-> ParsecT Text () Identity [Char] -> Parser InternalSubset
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> ScanState) -> ParsecT Text () Identity [Char]
scanText (InternalDoctypeState -> Char -> ScanState
dfa InternalDoctypeState
IDSStart)
Parser InternalSubset
-> Parser InternalSubset -> Parser InternalSubset
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> InternalSubset -> Parser InternalSubset
forall (m :: * -> *) a. Monad m => a -> m a
return InternalSubset
NoInternalSubset
where dfa :: InternalDoctypeState -> Char -> ScanState
dfa IDSStart '[' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSScanning 0))
dfa IDSStart _ = [Char] -> ScanState
ScanFail "Not a DOCTYPE internal subset"
dfa (IDSInQuote n :: Int
n c :: Char
c) d :: Char
d
| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
d = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSScanning Int
n))
| Bool
otherwise = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> Char -> InternalDoctypeState
IDSInQuote Int
n Char
c))
dfa (IDSScanning n :: Int
n) '[' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSScanning (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
+1)))
dfa (IDSScanning 0) ']' = ScanState
ScanFinish
dfa (IDSScanning n :: Int
n) ']' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSScanning (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-1)))
dfa (IDSScanning n :: Int
n) '\'' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> Char -> InternalDoctypeState
IDSInQuote Int
n '\''))
dfa (IDSScanning n :: Int
n) '\"' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> Char -> InternalDoctypeState
IDSInQuote Int
n '\"'))
dfa (IDSScanning n :: Int
n) '<' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSCommentS1 Int
n))
dfa (IDSScanning n :: Int
n) _ = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSScanning Int
n))
dfa (IDSCommentS1 n :: Int
n) '[' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSScanning (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
+1)))
dfa (IDSCommentS1 0) ']' = ScanState
ScanFinish
dfa (IDSCommentS1 n :: Int
n) ']' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSScanning (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-1)))
dfa (IDSCommentS1 n :: Int
n) '\'' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> Char -> InternalDoctypeState
IDSInQuote Int
n '\''))
dfa (IDSCommentS1 n :: Int
n) '\"' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> Char -> InternalDoctypeState
IDSInQuote Int
n '\"'))
dfa (IDSCommentS1 n :: Int
n) '!' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSCommentS2 Int
n))
dfa (IDSCommentS1 n :: Int
n) _ = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSScanning Int
n))
dfa (IDSCommentS2 n :: Int
n) '[' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSScanning (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
+1)))
dfa (IDSCommentS2 0) ']' = ScanState
ScanFinish
dfa (IDSCommentS2 n :: Int
n) ']' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSScanning (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-1)))
dfa (IDSCommentS2 n :: Int
n) '\'' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> Char -> InternalDoctypeState
IDSInQuote Int
n '\''))
dfa (IDSCommentS2 n :: Int
n) '\"' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> Char -> InternalDoctypeState
IDSInQuote Int
n '\"'))
dfa (IDSCommentS2 n :: Int
n) '-' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSCommentS3 Int
n))
dfa (IDSCommentS2 n :: Int
n) _ = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSScanning Int
n))
dfa (IDSCommentS3 n :: Int
n) '[' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSScanning (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
+1)))
dfa (IDSCommentS3 0) ']' = ScanState
ScanFinish
dfa (IDSCommentS3 n :: Int
n) ']' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSScanning (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-1)))
dfa (IDSCommentS3 n :: Int
n) '\'' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> Char -> InternalDoctypeState
IDSInQuote Int
n '\''))
dfa (IDSCommentS3 n :: Int
n) '\"' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> Char -> InternalDoctypeState
IDSInQuote Int
n '\"'))
dfa (IDSCommentS3 n :: Int
n) '-' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSComment Int
n))
dfa (IDSCommentS3 n :: Int
n) _ = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSScanning Int
n))
dfa (IDSComment n :: Int
n) '-' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSCommentD1 Int
n))
dfa (IDSComment n :: Int
n) _ = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSComment Int
n))
dfa (IDSCommentD1 n :: Int
n) '-' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSCommentE1 Int
n))
dfa (IDSCommentD1 n :: Int
n) _ = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSComment Int
n))
dfa (IDSCommentE1 n :: Int
n) '>' = (Char -> ScanState) -> ScanState
ScanNext (InternalDoctypeState -> Char -> ScanState
dfa (Int -> InternalDoctypeState
IDSScanning Int
n))
dfa (IDSCommentE1 _) _ = [Char] -> ScanState
ScanFail "Poorly formatted comment"
sdDecl :: Parser ()
sdDecl :: Parser ()
sdDecl = do
Text
_ <- Parser Text -> Parser Text
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
P.try (Parser Text -> Parser Text) -> Parser Text -> Parser Text
forall a b. (a -> b) -> a -> b
$ Parser ()
whiteSpace Parser () -> Parser Text -> Parser Text
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Text -> Parser Text
text "standalone"
Parser ()
eq
Text
_ <- Parser Text
single Parser Text -> Parser Text -> Parser Text
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text
double
() -> Parser ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
where
single :: Parser Text
single = Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char '\'' ParsecT Text () Identity Char -> Parser Text -> Parser Text
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text
yesno Parser Text -> ParsecT Text () Identity Char -> Parser Text
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char '\''
double :: Parser Text
double = Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char '\"' ParsecT Text () Identity Char -> Parser Text -> Parser Text
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text
yesno Parser Text -> ParsecT Text () Identity Char -> Parser Text
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char '\"'
yesno :: Parser Text
yesno = Text -> Parser Text
text "yes" Parser Text -> Parser Text -> Parser Text
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Text -> Parser Text
text "no"
element :: Parser Node
element :: Parser Node
element = do
(t :: Text
t,a :: [(Text, Text)]
a,b :: Bool
b) <- Parser (Text, [(Text, Text)], Bool)
emptyOrStartTag
if Bool
b then Node -> Parser Node
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> [(Text, Text)] -> [Node] -> Node
Element Text
t [(Text, Text)]
a [])
else Text -> [(Text, Text)] -> Parser Node
nonEmptyElem Text
t [(Text, Text)]
a
where
nonEmptyElem :: Text -> [(Text, Text)] -> Parser Node
nonEmptyElem t :: Text
t a :: [(Text, Text)]
a = do
[Node]
c <- Parser [Node]
content
Text -> Parser ()
endTag Text
t
Node -> Parser Node
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> [(Text, Text)] -> [Node] -> Node
Element Text
t [(Text, Text)]
a [Node]
c)
emptyOrStartTag :: Parser (Text, [(Text, Text)], Bool)
emptyOrStartTag :: Parser (Text, [(Text, Text)], Bool)
emptyOrStartTag = do
Text
t <- Parser Text -> Parser Text
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
P.try (Parser Text -> Parser Text) -> Parser Text -> Parser Text
forall a b. (a -> b) -> a -> b
$ Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char '<' ParsecT Text () Identity Char -> Parser Text -> Parser Text
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text
name
[(Text, Text)]
a <- ParsecT Text () Identity (Text, Text)
-> ParsecT Text () Identity [(Text, Text)]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (ParsecT Text () Identity (Text, Text)
-> ParsecT Text () Identity [(Text, Text)])
-> ParsecT Text () Identity (Text, Text)
-> ParsecT Text () Identity [(Text, Text)]
forall a b. (a -> b) -> a -> b
$ ParsecT Text () Identity (Text, Text)
-> ParsecT Text () Identity (Text, Text)
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
P.try (ParsecT Text () Identity (Text, Text)
-> ParsecT Text () Identity (Text, Text))
-> ParsecT Text () Identity (Text, Text)
-> ParsecT Text () Identity (Text, Text)
forall a b. (a -> b) -> a -> b
$ do
Parser ()
whiteSpace
ParsecT Text () Identity (Text, Text)
attribute
Bool -> Parser () -> Parser ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when ([(Text, Text)] -> Bool
forall a b. Eq a => [(a, b)] -> Bool
hasDups [(Text, Text)]
a) (Parser () -> Parser ()) -> Parser () -> Parser ()
forall a b. (a -> b) -> a -> b
$ [Char] -> Parser ()
forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail "Duplicate attribute names in element"
Maybe ()
_ <- Parser () -> ParsecT Text () Identity (Maybe ())
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser ()
whiteSpace
Maybe Char
e <- ParsecT Text () Identity Char
-> ParsecT Text () Identity (Maybe Char)
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional (Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char '/')
Char
_ <- Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char '>'
(Text, [(Text, Text)], Bool) -> Parser (Text, [(Text, Text)], Bool)
forall (m :: * -> *) a. Monad m => a -> m a
return (Text
t, [(Text, Text)]
a, Maybe Char -> Bool
forall a. Maybe a -> Bool
isJust Maybe Char
e)
where
hasDups :: [(a, b)] -> Bool
hasDups a :: [(a, b)]
a = [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length ([a] -> [a]
forall a. Eq a => [a] -> [a]
nub (((a, b) -> a) -> [(a, b)] -> [a]
forall a b. (a -> b) -> [a] -> [b]
map (a, b) -> a
forall a b. (a, b) -> a
fst [(a, b)]
a)) Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< [(a, b)] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [(a, b)]
a
attribute :: Parser (Text, Text)
attribute :: ParsecT Text () Identity (Text, Text)
attribute = do
Text
n <- Parser Text
name
Parser ()
eq
Text
v <- Parser Text
attrValue
(Text, Text) -> ParsecT Text () Identity (Text, Text)
forall (m :: * -> *) a. Monad m => a -> m a
return (Text
n,Text
v)
endTag :: Text -> Parser ()
endTag :: Text -> Parser ()
endTag s :: Text
s = do
Text
_ <- Text -> Parser Text
text "</"
Text
t <- Parser Text
name
Bool -> Parser () -> Parser ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Text
s Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
/= Text
t) (Parser () -> Parser ()) -> Parser () -> Parser ()
forall a b. (a -> b) -> a -> b
$ [Char] -> Parser ()
forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail ([Char] -> Parser ()) -> [Char] -> Parser ()
forall a b. (a -> b) -> a -> b
$ "mismatched tags: </" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Text -> [Char]
T.unpack Text
t [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
"> found inside <" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Text -> [Char]
T.unpack Text
s [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ "> tag"
Maybe ()
_ <- Parser () -> ParsecT Text () Identity (Maybe ())
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser ()
whiteSpace
Text
_ <- Text -> Parser Text
text ">"
() -> Parser ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
content :: Parser [Node]
content :: Parser [Node]
content = do
Maybe Node
n <- Parser Node -> Parser (Maybe Node)
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser Node
charData
[Maybe Node]
ns <- ([[Maybe Node]] -> [Maybe Node])
-> ParsecT Text () Identity [[Maybe Node]]
-> ParsecT Text () Identity [Maybe Node]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap [[Maybe Node]] -> [Maybe Node]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat (ParsecT Text () Identity [[Maybe Node]]
-> ParsecT Text () Identity [Maybe Node])
-> ParsecT Text () Identity [[Maybe Node]]
-> ParsecT Text () Identity [Maybe Node]
forall a b. (a -> b) -> a -> b
$ ParsecT Text () Identity [Maybe Node]
-> ParsecT Text () Identity [[Maybe Node]]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (ParsecT Text () Identity [Maybe Node]
-> ParsecT Text () Identity [[Maybe Node]])
-> ParsecT Text () Identity [Maybe Node]
-> ParsecT Text () Identity [[Maybe Node]]
forall a b. (a -> b) -> a -> b
$ do
Maybe Node
s <- ((Node -> Maybe Node
forall a. a -> Maybe a
Just (Node -> Maybe Node) -> (Text -> Node) -> Text -> Maybe Node
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> Node
TextNode (Text -> Maybe Node) -> Parser Text -> Parser (Maybe Node)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text
reference)
Parser (Maybe Node) -> Parser (Maybe Node) -> Parser (Maybe Node)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser (Maybe Node)
cdSect
Parser (Maybe Node) -> Parser (Maybe Node) -> Parser (Maybe Node)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser (Maybe Node)
processingInstruction
Parser (Maybe Node) -> Parser (Maybe Node) -> Parser (Maybe Node)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser (Maybe Node)
comment
Parser (Maybe Node) -> Parser (Maybe Node) -> Parser (Maybe Node)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Node -> Maybe Node) -> Parser Node -> Parser (Maybe Node)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Node -> Maybe Node
forall a. a -> Maybe a
Just Parser Node
element)
Maybe Node
t <- Parser Node -> Parser (Maybe Node)
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser Node
charData
[Maybe Node] -> ParsecT Text () Identity [Maybe Node]
forall (m :: * -> *) a. Monad m => a -> m a
return [Maybe Node
s,Maybe Node
t]
[Node] -> Parser [Node]
forall (m :: * -> *) a. Monad m => a -> m a
return ([Node] -> Parser [Node]) -> [Node] -> Parser [Node]
forall a b. (a -> b) -> a -> b
$ [Node] -> [Node]
coalesceText ([Node] -> [Node]) -> [Node] -> [Node]
forall a b. (a -> b) -> a -> b
$ [Maybe Node] -> [Node]
forall a. [Maybe a] -> [a]
catMaybes (Maybe Node
nMaybe Node -> [Maybe Node] -> [Maybe Node]
forall a. a -> [a] -> [a]
:[Maybe Node]
ns)
where
coalesceText :: [Node] -> [Node]
coalesceText (TextNode s :: Text
s : TextNode t :: Text
t : ns :: [Node]
ns)
= [Node] -> [Node]
coalesceText (Text -> Node
TextNode (Text -> Text -> Text
T.append Text
s Text
t) Node -> [Node] -> [Node]
forall a. a -> [a] -> [a]
: [Node]
ns)
coalesceText (n :: Node
n:ns :: [Node]
ns)
= Node
n Node -> [Node] -> [Node]
forall a. a -> [a] -> [a]
: [Node] -> [Node]
coalesceText [Node]
ns
coalesceText []
= []
charRef :: Parser Text
charRef :: Parser Text
charRef = Parser Text
hexCharRef Parser Text -> Parser Text -> Parser Text
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text
decCharRef
where
decCharRef :: Parser Text
decCharRef = do
Text
_ <- Text -> Parser Text
text "&#"
[Int]
ds <- ParsecT Text () Identity Int -> ParsecT Text () Identity [Int]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
some ParsecT Text () Identity Int
forall u. ParsecT Text u Identity Int
digit
Char
_ <- Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char ';'
let c :: Char
c = Int -> Char
chr (Int -> Char) -> Int -> Char
forall a b. (a -> b) -> a -> b
$ (Int -> Int -> Int) -> Int -> [Int] -> Int
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (\a :: Int
a b :: Int
b -> 10 Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
a Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
b) 0 [Int]
ds
Bool -> Parser () -> Parser ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Bool -> Bool
not (Char -> Bool
isValidChar Char
c)) (Parser () -> Parser ()) -> Parser () -> Parser ()
forall a b. (a -> b) -> a -> b
$ [Char] -> Parser ()
forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail ([Char] -> Parser ()) -> [Char] -> Parser ()
forall a b. (a -> b) -> a -> b
$
"Reference is not a valid character"
Text -> Parser Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Parser Text) -> Text -> Parser Text
forall a b. (a -> b) -> a -> b
$ Char -> Text
T.singleton Char
c
where
digit :: ParsecT Text u Identity Int
digit = do
Char
d <- (Char -> Bool) -> ParsecT Text u Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
P.satisfy (\c :: Char
c -> Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= '0' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= '9')
Int -> ParsecT Text u Identity Int
forall (m :: * -> *) a. Monad m => a -> m a
return (Char -> Int
ord Char
d Int -> Int -> Int
forall a. Num a => a -> a -> a
- Char -> Int
ord '0')
hexCharRef :: Parser Text
hexCharRef = do
Text
_ <- Text -> Parser Text
text "&#x"
[Int]
ds <- ParsecT Text () Identity Int -> ParsecT Text () Identity [Int]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
some ParsecT Text () Identity Int
forall u. ParsecT Text u Identity Int
digit
Char
_ <- Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char ';'
let c :: Char
c = Int -> Char
chr (Int -> Char) -> Int -> Char
forall a b. (a -> b) -> a -> b
$ (Int -> Int -> Int) -> Int -> [Int] -> Int
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (\a :: Int
a b :: Int
b -> 16 Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
a Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
b) 0 [Int]
ds
Bool -> Parser () -> Parser ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Bool -> Bool
not (Char -> Bool
isValidChar Char
c)) (Parser () -> Parser ()) -> Parser () -> Parser ()
forall a b. (a -> b) -> a -> b
$ [Char] -> Parser ()
forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail ([Char] -> Parser ()) -> [Char] -> Parser ()
forall a b. (a -> b) -> a -> b
$
"Reference is not a valid character"
Text -> Parser Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Parser Text) -> Text -> Parser Text
forall a b. (a -> b) -> a -> b
$ Char -> Text
T.singleton Char
c
where
digit :: ParsecT Text u Identity Int
digit = ParsecT Text u Identity Int
forall u. ParsecT Text u Identity Int
num ParsecT Text u Identity Int
-> ParsecT Text u Identity Int -> ParsecT Text u Identity Int
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ParsecT Text u Identity Int
forall u. ParsecT Text u Identity Int
upper ParsecT Text u Identity Int
-> ParsecT Text u Identity Int -> ParsecT Text u Identity Int
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ParsecT Text u Identity Int
forall u. ParsecT Text u Identity Int
lower
num :: ParsecT Text u Identity Int
num = do
Char
d <- (Char -> Bool) -> ParsecT Text u Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
P.satisfy (\c :: Char
c -> Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= '0' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= '9')
Int -> ParsecT Text u Identity Int
forall (m :: * -> *) a. Monad m => a -> m a
return (Char -> Int
ord Char
d Int -> Int -> Int
forall a. Num a => a -> a -> a
- Char -> Int
ord '0')
upper :: ParsecT Text u Identity Int
upper = do
Char
d <- (Char -> Bool) -> ParsecT Text u Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
P.satisfy (\c :: Char
c -> Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= 'A' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= 'F')
Int -> ParsecT Text u Identity Int
forall (m :: * -> *) a. Monad m => a -> m a
return (10 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Char -> Int
ord Char
d Int -> Int -> Int
forall a. Num a => a -> a -> a
- Char -> Int
ord 'A')
lower :: ParsecT Text u Identity Int
lower = do
Char
d <- (Char -> Bool) -> ParsecT Text u Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
P.satisfy (\c :: Char
c -> Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= 'a' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= 'f')
Int -> ParsecT Text u Identity Int
forall (m :: * -> *) a. Monad m => a -> m a
return (10 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Char -> Int
ord Char
d Int -> Int -> Int
forall a. Num a => a -> a -> a
- Char -> Int
ord 'a')
reference :: Parser Text
reference :: Parser Text
reference = Parser Text
charRef Parser Text -> Parser Text -> Parser Text
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text
entityRef
entityRef :: Parser Text
entityRef :: Parser Text
entityRef = do
Char
_ <- Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char '&'
Text
n <- Parser Text
name
Char
_ <- Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char ';'
case Text -> Map Text Text -> Maybe Text
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Text
n Map Text Text
entityRefLookup of
Nothing -> [Char] -> Parser Text
forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail ([Char] -> Parser Text) -> [Char] -> Parser Text
forall a b. (a -> b) -> a -> b
$ "Unknown entity reference: " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Text -> [Char]
T.unpack Text
n
Just t :: Text
t -> Text -> Parser Text
forall (m :: * -> *) a. Monad m => a -> m a
return Text
t
where
entityRefLookup :: Map Text Text
entityRefLookup :: Map Text Text
entityRefLookup = [(Text, Text)] -> Map Text Text
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList [
("amp", "&"),
("lt", "<"),
("gt", ">"),
("apos", "\'"),
("quot", "\"")
]
externalID :: Parser ExternalID
externalID :: Parser ExternalID
externalID = Parser ExternalID
systemID Parser ExternalID -> Parser ExternalID -> Parser ExternalID
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser ExternalID
publicID Parser ExternalID -> Parser ExternalID -> Parser ExternalID
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ExternalID -> Parser ExternalID
forall (m :: * -> *) a. Monad m => a -> m a
return ExternalID
NoExternalID
where
systemID :: Parser ExternalID
systemID = do
Text
_ <- Text -> Parser Text
text "SYSTEM"
Parser ()
whiteSpace
(Text -> ExternalID) -> Parser Text -> Parser ExternalID
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Text -> ExternalID
System Parser Text
systemLiteral
publicID :: Parser ExternalID
publicID = do
Text
_ <- Text -> Parser Text
text "PUBLIC"
Parser ()
whiteSpace
Text
pid <- Parser Text
pubIdLiteral
Parser ()
whiteSpace
Text
sid <- Parser Text
systemLiteral
ExternalID -> Parser ExternalID
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Text -> ExternalID
Public Text
pid Text
sid)
encodingDecl :: Parser Text
encodingDecl :: Parser Text
encodingDecl = do
Text
_ <- Parser Text -> Parser Text
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
P.try (Parser Text -> Parser Text) -> Parser Text -> Parser Text
forall a b. (a -> b) -> a -> b
$ Parser ()
whiteSpace Parser () -> Parser Text -> Parser Text
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Text -> Parser Text
text "encoding"
()
_ <- Parser ()
eq
Parser Text
singleQuoted Parser Text -> Parser Text -> Parser Text
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text
doubleQuoted
where
singleQuoted :: Parser Text
singleQuoted = Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char '\'' ParsecT Text () Identity Char -> Parser Text -> Parser Text
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text
encName Parser Text -> ParsecT Text () Identity Char -> Parser Text
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char '\''
doubleQuoted :: Parser Text
doubleQuoted = Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char '\"' ParsecT Text () Identity Char -> Parser Text -> Parser Text
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text
encName Parser Text -> ParsecT Text () Identity Char -> Parser Text
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char '\"'
encName :: Parser Text
encName = do
Char
c <- (Char -> Bool) -> ParsecT Text () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
P.satisfy Char -> Bool
isEncStart
Text
cs <- (Char -> Bool) -> Parser Text
takeWhile0 Char -> Bool
isEnc
Text -> Parser Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Char -> Text -> Text
T.cons Char
c Text
cs)
isEncStart :: Char -> Bool
isEncStart c :: Char
c | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= 'A' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= 'Z' = Bool
True
| Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= 'a' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= 'z' = Bool
True
| Bool
otherwise = Bool
False
isEnc :: Char -> Bool
isEnc c :: Char
c | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= 'A' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= 'Z' = Bool
True
| Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= 'a' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= 'z' = Bool
True
| Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= '0' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= '9' = Bool
True
| Char
c Char -> [Char] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` ['.','_','-'] = Bool
True
| Bool
otherwise = Bool
False