commit ghc-xlsx-tabular for openSUSE:Factory
Hello community,
here is the log from the commit of package ghc-xlsx-tabular for openSUSE:Factory checked in at 2017-03-03 17:52:40
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/ghc-xlsx-tabular (Old)
and /work/SRC/openSUSE:Factory/.ghc-xlsx-tabular.new (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "ghc-xlsx-tabular"
Fri Mar 3 17:52:40 2017 rev:2 rq:461700 version:0.2.2
Changes:
--------
--- /work/SRC/openSUSE:Factory/ghc-xlsx-tabular/ghc-xlsx-tabular.changes 2016-12-10 18:26:31.780362197 +0100
+++ /work/SRC/openSUSE:Factory/.ghc-xlsx-tabular.new/ghc-xlsx-tabular.changes 2017-03-03 17:52:41.220035600 +0100
@@ -1,0 +2,5 @@
+Sun Feb 12 14:17:29 UTC 2017 - psimons@suse.com
+
+- Update to version 0.2.2 with cabal2obs.
+
+-------------------------------------------------------------------
Old:
----
xlsx-tabular-0.1.0.1.tar.gz
New:
----
xlsx-tabular-0.2.2.tar.gz
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Other differences:
------------------
++++++ ghc-xlsx-tabular.spec ++++++
--- /var/tmp/diff_new_pack.gk2Dxh/_old 2017-03-03 17:52:41.955931662 +0100
+++ /var/tmp/diff_new_pack.gk2Dxh/_new 2017-03-03 17:52:41.959931096 +0100
@@ -1,7 +1,7 @@
#
# spec file for package ghc-xlsx-tabular
#
-# Copyright (c) 2016 SUSE LINUX GmbH, Nuernberg, Germany.
+# Copyright (c) 2017 SUSE LINUX GmbH, Nuernberg, Germany.
#
# All modifications and additions to the file contributed by third parties
# remain the property of their copyright owners, unless otherwise agreed
@@ -18,15 +18,14 @@
%global pkg_name xlsx-tabular
Name: ghc-%{pkg_name}
-Version: 0.1.0.1
+Version: 0.2.2
Release: 0
-Summary: Xlsx table decode utility
+Summary: Xlsx table cell value extraction utility
License: BSD-3-Clause
-Group: System/Libraries
+Group: Development/Languages/Other
Url: https://hackage.haskell.org/package/%{pkg_name}
Source0: https://hackage.haskell.org/package/%{pkg_name}-%{version}/%{pkg_name}-%{version}.tar.gz
BuildRequires: ghc-Cabal-devel
-# Begin cabal-rpm deps:
BuildRequires: ghc-aeson-devel
BuildRequires: ghc-bytestring-devel
BuildRequires: ghc-containers-devel
@@ -36,10 +35,12 @@
BuildRequires: ghc-text-devel
BuildRequires: ghc-xlsx-devel
BuildRoot: %{_tmppath}/%{name}-%{version}-build
-# End cabal-rpm deps
%description
-Please see README.md.
+Convenience utility to read xlsx tabular cells.
+
+You can extract the values from xlsx files table rows to JSON format by the
+heuristics or your custom function.
%package devel
Summary: Haskell %{pkg_name} library development files
@@ -55,15 +56,12 @@
%prep
%setup -q -n %{pkg_name}-%{version}
-
%build
%ghc_lib_build
-
%install
%ghc_lib_install
-
%post devel
%ghc_pkg_recache
++++++ xlsx-tabular-0.1.0.1.tar.gz -> xlsx-tabular-0.2.2.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/xlsx-tabular-0.1.0.1/src/Codec/Xlsx/Util/Tabular/Imports.hs new/xlsx-tabular-0.2.2/src/Codec/Xlsx/Util/Tabular/Imports.hs
--- old/xlsx-tabular-0.1.0.1/src/Codec/Xlsx/Util/Tabular/Imports.hs 2016-06-08 14:59:26.000000000 +0200
+++ new/xlsx-tabular-0.2.2/src/Codec/Xlsx/Util/Tabular/Imports.hs 2016-10-29 17:27:57.000000000 +0200
@@ -14,6 +14,7 @@
, Text
, IntSet
, IntSet.fromList
+ , IntSet.member
, FromJSON, parseJSON
, ToJSON, toJSON
, Value(Object), object
@@ -25,7 +26,7 @@
)
where
-import Codec.Xlsx as X hiding (fromList)
+import Codec.Xlsx as X
import Codec.Xlsx.Formatted as X
import Codec.Xlsx.Util.Tabular.Types as X
import Control.Applicative ((<$>), (<*>))
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/xlsx-tabular-0.1.0.1/src/Codec/Xlsx/Util/Tabular/Json.hs new/xlsx-tabular-0.2.2/src/Codec/Xlsx/Util/Tabular/Json.hs
--- old/xlsx-tabular-0.1.0.1/src/Codec/Xlsx/Util/Tabular/Json.hs 2016-03-09 08:43:26.000000000 +0100
+++ new/xlsx-tabular-0.2.2/src/Codec/Xlsx/Util/Tabular/Json.hs 2017-01-16 12:40:38.000000000 +0100
@@ -16,7 +16,7 @@
instance FromJSON RichTextRun where
parseJSON (Object v) =
- RichTextRun <$> pure Nothing <*> (v .: "text")
+ RichTextRun <$> return Nothing <*> (v .: "text")
deriveJSON defaultOptions
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/xlsx-tabular-0.1.0.1/src/Codec/Xlsx/Util/Tabular.hs new/xlsx-tabular-0.2.2/src/Codec/Xlsx/Util/Tabular.hs
--- old/xlsx-tabular-0.1.0.1/src/Codec/Xlsx/Util/Tabular.hs 2016-03-09 08:10:01.000000000 +0100
+++ new/xlsx-tabular-0.2.2/src/Codec/Xlsx/Util/Tabular.hs 2016-11-09 15:20:31.000000000 +0100
@@ -1,7 +1,35 @@
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
--- | Convinience utility to read Xlsx tabular cells.
+
+{- | Convenience utility to read Xlsx tabular cells.
+
+The majority of the @toTableRows*@ functions assume that the table
+of interest consiste of contiguous rows styled with borders lines
+surrounding all cells, with possible text above and below the table
+that is not of interest. Like so:
+
+@
+Some documentation here....
+---------------------------
+| Header1 | Header2 | ... |
+---------------------------
+| Value1 | Value2 | ... |
+---------------------------
+| Value1 | Value2 | ... |
+---------------------------
+Maybe some annoying text here, I don't care about.
+@
+
+The heauristic used for table row selection in these functions is
+that any table rows will have a bottom border line.
+
+If the above heuristic is not valid for your table you can instead
+provide your own row selection predicate to the `toTableRowsCustom`
+function. For example, the predicate @\\_ _ -> True@ (or @(const
+. const) True@) will select all contiguous rows.
+
+-}
module Codec.Xlsx.Util.Tabular
(
-- * Types
@@ -18,167 +46,148 @@
-- ** TabularRow
, tabularRowIx
, tabularRowCells
- -- * Methods
- , def
-- * Functions
, toTableRowsFromFile
, toTableRows
, toTableRows'
+ -- * Custom row predicates
+ , toTableRowsCustom
) where
import Codec.Xlsx.Util.Tabular.Imports
import qualified Data.ByteString.Lazy as ByteString
-type Rows =
- [(Int, Cols)]
+type Row = (Int, Cols)
+
+type Rows = [(Int, Cols)] -- [Row]
-type Cols =
- [(Int, Cell)]
+type Cols = [(Int, Cell)]
-type RowValues =
- [(Int, [(Int, Maybe CellValue)])]
+type RowValues = [(Int, [(Int, Maybe CellValue)])]
+-- | A @RowPredicate@ is given the Xlsx "StyleSheet" as well as the
+-- row itself (consisting of the row's index and the row's cells) and
+-- should return @True@ if the row is part of the table and false
+-- otherwise.
+type RowPredicate = StyleSheet -> Row -> Bool
--- |Read from Xlsx file as tabular rows
+-- |Read tabular rows from the first sheel of an Xlsx file.
+-- The table is assumed to consist of all contiguous rows
+-- that have bottom border lines, starting with the header.
toTableRowsFromFile :: Int -- ^ Starting row index (header row)
-> String -- ^ File name
-> IO (Maybe Tabular)
-toTableRowsFromFile offset fname = do
- s <- ByteString.readFile fname
- let xlsx = toXlsx s
- rows = toTableRows' xlsx offset
- pure rows
+toTableRowsFromFile offset fname =
+ flip toTableRows' offset . toXlsx <$> ByteString.readFile fname
-- |Decode cells as tabular rows.
+-- The table is assumed to consist of all contiguous rows
+-- that have bottom border lines, starting with the header.
toTableRows :: Xlsx -- ^ Xlsx Workbook
-> Text -- ^ Worksheet name to decode
-> Int -- ^ Starting row index (header row)
-> Maybe Tabular
-toTableRows xlsx sheetName offset =
- decodeRows <$> styles <*> Just offset <*> rows
- where
- styles = parseStyleSheet (xlsx ^. xlStyles) ^? _Right
- rows =
- xlsx
- ^? ixSheet sheetName
- . wsCells
- . to toRows
+toTableRows = toTableRowsCustom borderBottomPredicate
--- |Decode cells as tabular rows from first sheet.
+-- |Decode cells from first sheet as tabular rows.
+-- The table is assumed to consist of all contiguous rows
+-- that have bottom border lines, starting with the header.
toTableRows' :: Xlsx -- ^ Xlsx Workbook
-> Int -- ^ Starting row index (header row)
-> Maybe Tabular
-toTableRows' xlsx offset =
- toTableRows xlsx firstSheetName offset
+toTableRows' xlsx = toTableRows xlsx firstSheetName
where
- firstSheetName =
- xlsx ^. xlSheets
- & keys
- & head
+ firstSheetName = fst $ head $ xlsx ^. xlSheets
+ -- ^ TODO: Is this still true with xlsx-0.3 or are sheets now
+ -- in alphabetical order??
+
+-- | Decode cells as tabular rows.
+-- The table is assumed to consist of all contiguous rows
+-- that fulfill the given predicate, starting with the header.
+--
+-- The predicate function is given the Xlsx @StyleSheet@ as well
+-- as a row (consisting of the row's index and the row's cells)
+-- and should return @True@ if the row is part of the table.
+--
+-- Since 0.1.1
+toTableRowsCustom :: (StyleSheet -> (Int, [(Int, Cell)]) -> Bool)
+ -- ^ Predicate for row selection
+ -> Xlsx -- ^ Xlsx Workbook
+ -> Text -- ^ Worksheet name to decode
+ -> Int -- ^ Starting row index (header row)
+ -> Maybe Tabular
+toTableRowsCustom predicate xlsx sheetName offset = do
+ styles <- parseStyleSheet (xlsx ^. xlStyles) ^? _Right
+ rows <- xlsx ^? ixSheet sheetName . wsCells . to toRows
+ decodeRows (predicate styles) offset rows
-decodeRows ss offset rs =
+decodeRows p offset rs = if null rs' then Nothing else Just $
def
& tabularHeads .~ header'
- & tabularRows .~ rows
+ & tabularRows .~ rows
where
- rs' = getCells ss offset rs
+ rs' = getCells p offset rs
header = head rs' ^. _2
- header' =
- header
- & fmap toText
- & join
+ header' = join $ map toText header
toText (i, Just (CellText t)) = [def
& tabularHeadIx .~ i
& tabularHeadLabel .~ t]
toText _ = []
- cix = fmap (view tabularHeadIx) header'
- & fromList
- rows =
- fmap rowValue (tail rs')
- rowValue rvs =
- def
- & tabularRowIx .~ (rvs ^. _1)
- & tabularRowCells .~ (rvs ^. _2 & fmap f & join)
+ ixs = map (view tabularHeadIx) header'
+ rows = map rowValue (tail rs')
+ rowValue (ix, row) = def
+ & tabularRowIx .~ ix
+ & tabularRowCells .~ insertMissingCells ixs row
where
- f (i, cell) =
- [cell | cix ^. contains i]
+ -- Insert empty cells when there is a header but no corresponding
+ -- cell. This can happen if cells have no content nor formatting
+ -- defined.
+ insertMissingCells :: [Int] -> [(Int, Maybe CellValue)] -> [Maybe CellValue]
+ insertMissingCells ixs cs = map (join . flip lookup cs) ixs
+
--- |行から値のあるセルを取り出す
-getCells :: StyleSheet -- ^スタイルシート
- -> Int -- ^開始行
- -> Rows -- ^セル行
+-- |Pickup cells that has value from line
+getCells :: (Row -> Bool) -- ^ Predicate
+ -> Int -- ^ Start line number
+ -> Rows -- ^ cell rows
-> RowValues
-getCells ss i rs =
- startAt ss i rs
- & takeContiguous i
- & takeUntil ss
- & fmap rvs
- & filter vs
- where
- rvs (i, cs) =
- (i, rowValues cs)
- filter =
- Prelude.filter
- vs (i, cs) =
- any (\(_, v) -> isJust v) cs
-
-startAt :: StyleSheet -> Int -> Rows -> Rows
-startAt ss i rs =
- dropWhile f rs
- where
- f (x, _) =
- x < i
+getCells p i = filter (any (isJust . snd) . snd)
+ . (fmap . fmap) rowValues
+ . takeContiguous i
+ . takeWhile p
+ . startAt i
+
+startAt :: Int -> Rows -> Rows
+startAt i = dropWhile ((< i) . fst)
--- |指定の行から連続している行を取り出す
+-- |Take contiguous rows that start from i
takeContiguous :: Int -> Rows -> Rows
-takeContiguous i rs =
- [r | (x, r@(y, _)) <- zip [i..] rs, x == y]
+--takeContiguous i rs = [r | (x, r@(y, _)) <- zip [i..] rs, x == y]
+takeContiguous i = map snd . filter (uncurry (==) . fmap fst) . zip [i..]
--- |有効セルのすべてに枠線(Bottom側)が存在しなくなる
--- |すなわち枠囲みの欄外になるまでの行を取り出す
-takeUntil :: StyleSheet -> Rows -> Rows
-takeUntil ss rs =
- takeWhile f rs
- where
- f (i, cs) =
- or $ rowBordersHas borderBottom ss cs
+rowValues = map (fmap _cellValue)
-rowBordersHas v ss cs =
- x
- where
- x =
- fmap f cs
- f (i, cell) =
- cellHasBorder ss cell v
+-- Predicate for at least one cell having a bottom border style.
+
+-- |Take rows while all valued cell has bottom border line.
+-- | * no bottom border line means out of table.
+borderBottomPredicate :: RowPredicate -- StyleSheet -> Row -> Bool
+borderBottomPredicate ss = or . rowBordersHas borderBottom ss . snd
+
+rowBordersHas v ss = map (cellHasBorder v ss . snd)
-rowValues cs =
- x
- where
- x =
- fmap f cs
- f (i, cell) =
- (i, cell ^. cellValue)
-cellHasBorder ss cell v =
- fromMaybe False mb
+cellHasBorder v ss cell = fromMaybe False mb
where
- b = cellBorder ss cell
- mb = borderStyleHasLine v <$> b
+ mb = borderStyleHasLine v <$> cellBorder ss cell
cellBorder :: StyleSheet -> Cell -> Maybe Border
-cellBorder ss cell =
- view cellStyle cell
- >>= pure . xf
- >>= view cellXfBorderId
- >>= pure . bd
+cellBorder ss cell = fmap xf (view cellStyle cell)
+ >>= fmap bd . view cellXfBorderId
where
xf n = (ss ^. styleSheetCellXfs) !! n
bd n = (ss ^. styleSheetBorders) !! n
-borderStyleHasLine v b =
- fromMaybe False value
+borderStyleHasLine v b = fromMaybe False value
where
- value =
- view v b
- >>= view borderStyleLine
- >>= pure . (/= LineStyleNone)
+ value = view v b >>= fmap (/= LineStyleNone) . view borderStyleLine
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/xlsx-tabular-0.1.0.1/xlsx-tabular.cabal new/xlsx-tabular-0.2.2/xlsx-tabular.cabal
--- old/xlsx-tabular-0.1.0.1/xlsx-tabular.cabal 2016-06-08 15:15:12.000000000 +0200
+++ new/xlsx-tabular-0.2.2/xlsx-tabular.cabal 2017-01-16 13:43:21.000000000 +0100
@@ -1,14 +1,22 @@
name: xlsx-tabular
-version: 0.1.0.1
-synopsis: Xlsx table decode utility
-description: Please see README.md
-homepage: http://github.com/kkazuo/xlsx-tabular#readme
+version: 0.2.2
+synopsis: Xlsx table cell value extraction utility
+description:
+ .
+ Convenience utility to read xlsx tabular cells.
+ .
+ You can extract the values from xlsx files
+ table rows to JSON format by the heuristics or
+ your custom function.
+
+homepage: https://github.com/kkazuo/xlsx-tabular
+bug-reports: https://github.com/kkazuo/xlsx-tabular/issues
license: BSD3
license-file: LICENSE
-author: Kazuo Koga
-maintainer: obiwanko@me.com
-copyright: (c) 2016 Kazuo Koga
-category: Codec
+author: Koga Kazuo
participants (1)
-
root@hilbertn.suse.de