{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE OverloadedStrings #-}
module Text.Pandoc.Readers.LaTeX.Table
  ( tableEnvironments )
where

import Data.Functor (($>))
import Text.Pandoc.Class
import Text.Pandoc.Readers.LaTeX.Parsing
import Text.Pandoc.TeX
import Text.Pandoc.Builder as B
import qualified Data.Map as M
import Data.Text (Text)
import Data.Maybe (fromMaybe)
import qualified Data.Text as T
import Control.Applicative ((<|>), optional, many)
import Control.Monad (when, void)
import Text.Pandoc.Shared (safeRead, trim)
import Text.Pandoc.Logging (LogMessage(SkippedContent))
import Text.Pandoc.Walk (walkM)
import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
                            optional, space, spaces, withRaw, (<|>))

tableEnvironments :: PandocMonad m
                  => LP m Blocks
                  -> LP m Inlines
                  -> M.Map Text (LP m Blocks)
tableEnvironments blocks inline =
  M.fromList
  [ ("longtable",  env "longtable" $
          resetCaption *>
            simpTable blocks inline "longtable" False >>= addTableCaption)
  , ("table",  env "table" $
          skipopts *> resetCaption *> blocks >>= addTableCaption)
  , ("tabular*", env "tabular*" $ simpTable blocks inline "tabular*" True)
  , ("tabularx", env "tabularx" $ simpTable blocks inline "tabularx" True)
  , ("tabular", env "tabular"  $ simpTable blocks inline "tabular" False)
  ]

hline :: PandocMonad m => LP m ()
hline = try $ do
  spaces
  hasParenArg <-
      (False <$ controlSeq "hline") <|>
      (False <$ controlSeq "cline") <|>
      (True <$ controlSeq "cmidrule") <|>
      -- booktabs rules:
      (True <$ controlSeq "toprule") <|>
      (True <$ controlSeq "bottomrule") <|>
      (True <$ controlSeq "midrule") <|>
      (True <$ controlSeq "endhead") <|>
      (True <$ controlSeq "endfirsthead")
  optional rawopt
  when hasParenArg $ void $ optional (parenWrapped (() <$ singleChar))

lbreak :: PandocMonad m => LP m Tok
lbreak = (controlSeq "\\" <|> controlSeq "tabularnewline")
         <* skipopts <* spaces

amp :: PandocMonad m => LP m Tok
amp = symbol '&'

-- Split a Word into individual Symbols (for parseAligns)
splitWordTok :: PandocMonad m => LP m ()
splitWordTok = do
  TokStream macrosExpanded inp <- getInput
  case inp of
       (Tok spos Word t : rest) ->
         setInput $ TokStream macrosExpanded
                  $ map (Tok spos Symbol . T.singleton) (T.unpack t) <> rest
       _ -> return ()

parseAligns :: PandocMonad m => LP m [(Alignment, ColWidth, ([Tok], [Tok]))]
parseAligns = try $ do
  let maybeBar = skipMany
        (try $ sp *> (() <$ symbol '|' <|> () <$ (symbol '@' >> braced)))
  let cAlign = AlignCenter <$ symbol 'c'
  let lAlign = AlignLeft <$ symbol 'l'
  let rAlign = AlignRight <$ symbol 'r'
  let parAlign = AlignLeft <$ symbol 'p'
  -- aligns from tabularx
  let xAlign = AlignLeft <$ symbol 'X'
  let mAlign = AlignLeft <$ symbol 'm'
  let bAlign = AlignLeft <$ symbol 'b'
  let alignChar = splitWordTok *> (  cAlign <|> lAlign <|> rAlign <|> parAlign
                                 <|> xAlign <|> mAlign <|> bAlign )
  let alignPrefix = symbol '>' >> braced
  let alignSuffix = symbol '<' >> braced
  let colWidth = try $ do
        symbol '{'
        ds <- trim . untokenize <$> manyTill anyTok (controlSeq "linewidth")
        spaces
        symbol '}'
        return $ safeRead ds
  let alignSpec = do
        pref <- option [] alignPrefix
        spaces
        al <- alignChar
        width <- colWidth <|> option Nothing (do s <- untokenize <$> braced
                                                 pos <- getPosition
                                                 report $ SkippedContent s pos
                                                 return Nothing)
        spaces
        suff <- option [] alignSuffix
        return (al, width, (pref, suff))
  let starAlign = do -- '*{2}{r}' == 'rr', we just expand like a macro
        symbol '*'
        spaces
        ds <- trim . untokenize <$> bracedOrToken
        spaces
        spec <- bracedOrToken
        case safeRead ds of
             Just n  -> do
               TokStream _ ts <- getInput
               setInput $ TokStream False (mconcat (replicate n spec) ++ ts)
             Nothing -> Prelude.fail $ "Could not parse " <> T.unpack ds <> " as number"
  bgroup
  spaces
  maybeBar
  aligns' <- many $ try $ spaces >> optional starAlign >>
                            (alignSpec <* maybeBar)
  spaces
  egroup
  spaces
  return $ map toSpec aligns'
  where
    toColWidth (Just w) | w > 0 = ColWidth w
    toColWidth _                = ColWidthDefault
    toSpec (x, y, z) = (x, toColWidth y, z)

-- N.B. this parser returns a Row that may have erroneous empty cells
-- in it. See the note above fixTableHead for details.
parseTableRow :: PandocMonad m
              => LP m Blocks -- ^ block parser
              -> LP m Inlines -- ^ inline parser
              -> Text   -- ^ table environment name
              -> [([Tok], [Tok])] -- ^ pref/suffixes
              -> LP m Row
parseTableRow blocks inline envname prefsufs = do
  notFollowedBy (spaces *> end_ envname)
  -- contexts that can contain & that is not colsep:
  let canContainAmp (Tok _ (CtrlSeq "begin") _) = True
      canContainAmp (Tok _ (CtrlSeq "verb") _)  = True
      canContainAmp (Tok _ (CtrlSeq "Verb") _)  = True
      canContainAmp _       = False
  -- add prefixes and suffixes in token stream:
  let celltoks (pref, suff) = do
        prefpos <- getPosition
        contents <- mconcat <$>
            many ( snd <$> withRaw
                     ((lookAhead (controlSeq "parbox") >>
                       void blocks) -- #5711
                      <|>
                      (lookAhead (satisfyTok canContainAmp) >> void inline)
                      <|>
                      (lookAhead (symbol '$') >> void inline))
                  <|>
                   (do notFollowedBy
                         (() <$ amp <|> () <$ lbreak <|> end_ envname)
                       count 1 anyTok) )

        suffpos <- getPosition
        option [] (count 1 amp)
        return $ map (setpos prefpos) pref ++ contents ++ map (setpos suffpos) suff
  rawcells <- mapM celltoks prefsufs
  cells <- mapM (parseFromToks (parseTableCell blocks)) rawcells
  spaces
  return $ Row nullAttr cells

parseTableCell :: PandocMonad m => LP m Blocks -> LP m Cell
parseTableCell blocks = do
  spaces
  updateState $ \st -> st{ sInTableCell = True }
  cell' <-   multicolumnCell blocks
         <|> multirowCell blocks
         <|> parseSimpleCell
         <|> parseEmptyCell
  updateState $ \st -> st{ sInTableCell = False }
  spaces
  return cell'
  where
    -- The parsing of empty cells is important in LaTeX, especially when dealing
    -- with multirow/multicolumn. See #6603.
    parseEmptyCell = spaces $> emptyCell
    parseSimpleCell = simpleCell <$> (plainify <$> blocks)


cellAlignment :: PandocMonad m => LP m Alignment
cellAlignment = skipMany (symbol '|') *> alignment <* skipMany (symbol '|')
  where
    alignment = do
      c <- untoken <$> singleChar <* optional braced -- ignore args
      return $ case c of
        "l" -> AlignLeft
        "r" -> AlignRight
        "c" -> AlignCenter
        "*" -> AlignDefault
        _   -> AlignDefault

plainify :: Blocks -> Blocks
plainify bs = case toList bs of
                [Para ils] -> plain (fromList ils)
                _          -> bs

multirowCell :: PandocMonad m => LP m Blocks -> LP m Cell
multirowCell blocks = controlSeq "multirow" >> do
  -- Full prototype for \multirow macro is:
  --     \multirow[vpos]{nrows}[bigstruts]{width}[vmove]{text}
  -- However, everything except `nrows` and `text` make
  -- sense in the context of the Pandoc AST
  _ <- optional $ symbol '[' *> cellAlignment <* symbol ']'   -- vertical position
  nrows <- fmap (fromMaybe 1 . safeRead . untokenize) braced
  _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']')  -- bigstrut-related
  _ <- symbol '{' *> manyTill anyTok (symbol '}')             -- Cell width
  _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']')  -- Length used for fine-tuning
  content <- symbol '{' *> (plainify <$> blocks) <* symbol '}'
  return $ cell AlignDefault (RowSpan nrows) (ColSpan 1) content

multicolumnCell :: PandocMonad m => LP m Blocks -> LP m Cell
multicolumnCell blocks = controlSeq "multicolumn" >> do
  span' <- fmap (fromMaybe 1 . safeRead . untokenize) braced
  alignment <- symbol '{' *> cellAlignment <* symbol '}'

  let singleCell = do
        content <- plainify <$> blocks
        return $ cell alignment (RowSpan 1) (ColSpan span') content

  -- Two possible contents: either a \multirow cell, or content.
  -- E.g. \multicol{1}{c}{\multirow{2}{1em}{content}}
  -- Note that a \multirow cell can be nested in a \multicolumn,
  -- but not the other way around. See #6603
  let nestedCell = do
        (Cell _ _ (RowSpan rs) _ bs) <- multirowCell blocks
        return $ cell
                  alignment
                  (RowSpan rs)
                  (ColSpan span')
                  (fromList bs)

  symbol '{' *> (nestedCell <|> singleCell) <* symbol '}'

-- LaTeX tables are stored with empty cells underneath multirow cells
-- denoting the grid spaces taken up by them. More specifically, if a
-- cell spans m rows, then it will overwrite all the cells in the
-- columns it spans for (m-1) rows underneath it, requiring padding
-- cells in these places. These padding cells need to be removed for
-- proper table reading. See #6603.
--
-- These fixTable functions do not otherwise fix up malformed
-- input tables: that is left to the table builder.
fixTableHead :: TableHead -> TableHead
fixTableHead (TableHead attr rows) = TableHead attr rows'
  where
    rows' = fixTableRows rows

fixTableBody :: TableBody -> TableBody
fixTableBody (TableBody attr rhc th tb)
  = TableBody attr rhc th' tb'
  where
    th' = fixTableRows th
    tb' = fixTableRows tb

fixTableRows :: [Row] -> [Row]
fixTableRows = fixTableRows' $ repeat Nothing
  where
    fixTableRows' oldHang (Row attr cells : rs)
      = let (newHang, cells') = fixTableRow oldHang cells
            rs'               = fixTableRows' newHang rs
        in Row attr cells' : rs'
    fixTableRows' _ [] = []

-- The overhang is represented as Just (relative cell dimensions) or
-- Nothing for an empty grid space.
fixTableRow :: [Maybe (ColSpan, RowSpan)] -> [Cell] -> ([Maybe (ColSpan, RowSpan)], [Cell])
fixTableRow oldHang cells
  -- If there's overhang, drop cells until their total width meets the
  -- width of the occupied grid spaces (or we run out)
  | (n, prefHang, restHang) <- splitHang oldHang
  , n > 0
  = let cells' = dropToWidth getCellW n cells
        (restHang', cells'') = fixTableRow restHang cells'
    in (prefHang restHang', cells'')
  -- Otherwise record the overhang of a pending cell and fix the rest
  -- of the row
  | c@(Cell _ _ h w _):cells' <- cells
  = let h' = max 1 h
        w' = max 1 w
        oldHang' = dropToWidth getHangW w' oldHang
        (newHang, cells'') = fixTableRow oldHang' cells'
    in (toHang w' h' <> newHang, c : cells'')
  | otherwise
  = (oldHang, [])
  where
    getCellW (Cell _ _ _ w _) = w
    getHangW = maybe 1 fst
    getCS (ColSpan n) = n

    toHang c r
      | r > 1     = [Just (c, r)]
      | otherwise = replicate (getCS c) Nothing

    -- Take the prefix of the overhang list representing filled grid
    -- spaces. Also return the remainder and the length of this prefix.
    splitHang = splitHang' 0 id

    splitHang' !n l (Just (c, r):xs)
      = splitHang' (n + c) (l . (toHang c (r-1) ++)) xs
    splitHang' n l xs = (n, l, xs)

    -- Drop list items until the total width of the dropped items
    -- exceeds the passed width.
    dropToWidth _     n l | n < 1 = l
    dropToWidth wproj n (c:cs)    = dropToWidth wproj (n - wproj c) cs
    dropToWidth _     _ []        = []

simpTable :: PandocMonad m
          => LP m Blocks
          -> LP m Inlines
          -> Text
          -> Bool
          -> LP m Blocks
simpTable blocks inline envname hasWidthParameter = try $ do
  when hasWidthParameter $ () <$ tokWith inline
  skipopts
  colspecs <- parseAligns
  let (aligns, widths, prefsufs) = unzip3 colspecs
  optional $ controlSeq "caption" *> setCaption inline
  spaces
  optional label
  spaces
  optional lbreak
  spaces
  skipMany hline
  spaces
  header' <- option [] . try . fmap (:[]) $
             parseTableRow blocks inline envname prefsufs <*
               lbreak <* many1 hline
  spaces
  rows <- sepEndBy (parseTableRow blocks inline envname prefsufs)
                    (lbreak <* optional (skipMany hline))
  spaces
  optional $ controlSeq "caption" *> setCaption inline
  spaces
  optional label
  spaces
  optional lbreak
  spaces
  lookAhead $ controlSeq "end" -- make sure we're at end
  let th  = fixTableHead $ TableHead nullAttr header'
  let tbs = [fixTableBody $ TableBody nullAttr 0 [] rows]
  let tf  = TableFoot nullAttr []
  return $ table emptyCaption (zip aligns widths) th tbs tf

addTableCaption :: PandocMonad m => Blocks -> LP m Blocks
addTableCaption = walkM go
  where go (Table attr c spec th tb tf) = do
          st <- getState
          let capt = fromMaybe c $ sCaption st
          let mblabel = sLastLabel st
          case mblabel of
            Nothing -> return ()
            Just lab -> do
                     num <- getNextNumber sLastTableNum
                     setState
                       st{ sLastTableNum = num
                         , sLabels = M.insert lab
                                    [Str (renderDottedNum num)]
                                    (sLabels st) }
          -- add num to caption?
          let attr' = case (attr, mblabel) of
                        ((_,classes,kvs), Just ident) ->
                           (ident,classes,kvs)
                        _ -> attr
          return $ addAttrDiv attr'
                 $ maybe id removeLabel mblabel
                 $ Table nullAttr capt spec th tb tf
        go x = return x

-- TODO: For now we add a Div to contain table attributes, since
-- most writers don't do anything yet with attributes on Table.
-- This can be removed when that changes.
addAttrDiv :: Attr -> Block -> Block
addAttrDiv ("",[],[]) b = b
addAttrDiv attr b       = Div attr [b]
