Выбор правильного результата разбора ReadP

Я пытаюсь разобрать адрес электронной почты RFC5322. Мой парсер работает в том смысле, что среди результатов один из них правильный. Однако как мне выбрать "правильный" результат?

Учитывая строку Foo Bar <foo@bar.com>мой парсер должен выдавать значение Address (Just "Foo Bar") "foo@bar.com",

В качестве альтернативы, учитывая строку foo@bar.comмой парсер должен выдавать значение Address Nothing "foo@bar.com",

Значение с включенным именем является предпочтительным.

Мой парсер выглядит так:

import           Control.Applicative
import           Data.Char
import qualified Data.Text                     as T
import           Text.ParserCombinators.ReadP

onlyEmail :: ReadP Address
onlyEmail = do
  skipSpaces
  email <- many1 $ satisfy isAscii
  skipSpaces
  return $ Address Nothing (T.pack email)

withName :: ReadP Address
withName = do
  skipSpaces
  name <- many1 (satisfy isAscii)
  skipSpaces
  email <- between (char '<') (char '>') (many1 $ satisfy isAscii)
  skipSpaces
  return $ Address (Just $ T.pack name) (T.pack email)

rfc5322 :: ReadP Address
rfc5322 = withName <|> onlyEmail

Когда я запускаю парсер с readP_to_S rfc5322 "Foo Bar <foo@bar.com>", он дает следующие результаты:

[ (Address {addressName = Nothing, addressEmail = "F"},"oo Bar <foo@bar.com>")
, (Address {addressName = Nothing, addressEmail = "Fo"},"o Bar <foo@bar.com>")
, (Address {addressName = Nothing, addressEmail = "Foo"},"Bar <foo@bar.com>")
, (Address {addressName = Nothing, addressEmail = "Foo "},"Bar <foo@bar.com>")
, (Address {addressName = Nothing, addressEmail = "Foo B"},"ar <foo@bar.com>")
, (Address {addressName = Nothing, addressEmail = "Foo Ba"},"r <foo@bar.com>")
, (Address {addressName = Nothing, addressEmail = "Foo Bar"},"<foo@bar.com>")
, (Address {addressName = Nothing, addressEmail = "Foo Bar "},"<foo@bar.com>")
, (Address {addressName = Nothing, addressEmail = "Foo Bar <"},"foo@bar.com>")
, (Address {addressName = Nothing, addressEmail = "Foo Bar <f"},"oo@bar.com>")
, (Address {addressName = Nothing, addressEmail = "Foo Bar <fo"},"o@bar.com>")
, (Address {addressName = Nothing, addressEmail = "Foo Bar <foo"},"@bar.com>")
, (Address {addressName = Nothing, addressEmail = "Foo Bar <foo@"},"bar.com>")
, (Address {addressName = Nothing, addressEmail = "Foo Bar <foo@b"},"ar.com>")
, (Address {addressName = Nothing, addressEmail = "Foo Bar <foo@ba"},"r.com>")
, (Address {addressName = Nothing, addressEmail = "Foo Bar <foo@bar"},".com>")
, (Address {addressName = Nothing, addressEmail = "Foo Bar <foo@bar."},"com>")
, (Address {addressName = Nothing, addressEmail = "Foo Bar <foo@bar.c"},"om>")
, (Address {addressName = Nothing, addressEmail = "Foo Bar <foo@bar.co"},"m>")
, (Address {addressName = Nothing, addressEmail = "Foo Bar <foo@bar.com"},">")
, (Address {addressName = Just "Foo Bar", addressEmail = "foo@bar.com"},"")
, (Address {addressName = Just "Foo Bar ", addressEmail = "foo@bar.com"},"")
, (Address {addressName = Nothing, addressEmail = "Foo Bar <foo@bar.com>"},"")
]

В этом случае результат, который я на самом деле хочу, появляется в списке третьим-последним. Как мне выразить это предпочтение?

1 ответ

Решение

Вы не должны делать предпочтения. Ваша проблема в том, что ваши частичные парсеры принимают больший набор строк, чем на самом деле нужно.

Например, мое решение:

import           Control.Bool
import           Control.Applicative
import           Data.Char
import qualified Data.Text                     as T
import           Data.Text (Text)
import           Text.ParserCombinators.ReadP

email :: ReadP Text
email = do
    l <- part
    a <- char '@'
    d <- part
    return . T.pack $ l ++ a:d
  where
    part = munch1 (isAscii <&&> (/='@') <&&> (/='<') <&&> (/='>'))

name :: ReadP Text
name = T.pack <$> chainr1 part sep
  where
    part = munch1 (isAlpha <||> isDigit <||> (=='\''))
    sep  = (\xs ys -> xs ++ ' ':ys) <$ munch1 (==' ')

onlyEmail :: ReadP Address
onlyEmail = Address Nothing <$> email

withName :: ReadP Address
withName = do
    n <- name
    skipSpaces
    e <- between (char '<') (char '>') email
    return $ Address (Just n) e

address :: ReadP Address
address = skipSpaces *> (withName <|> onlyEmail)

main = print $ readP_to_S address "Foo Bar <foo@bar.com>"

Будет напечатано:

[(Address (Just "Foo Bar") "foo@bar.com","")]
Другие вопросы по тегам