Incorrect character set of Fortnox SIE (Swedish audit file) files

Tt seems there is a little mis-handling of charset provided by fortnox

create or replace table sie_file@inmemorystorage
as 
select * 
from   Fortnox.Sie.SieFileByType(4, 27)
select * from sie_file@inmemorystorage

This is a proposed -beta- approach for parsing the SIE. It runs way faster then via the regular enpoint at the fortnox API. The fortnox SIE enpoint is very fast, few ms to get a full year.

The script then parses the SIE nodes to create @inmemory tables with all information in the file. The transaction parser process approx 1000 transaction in 15 seconds. Other parsers (balances, accounts etc..) are almost instant.

--------------------------------------------------------------------------------
-- SIE 4 (Transactions) → fast split with csvtable → set-based parsing
-- 
--------------------------------------------------------------------------------

create or replace table sie_file@inmemorystorage
( FinancialYearId number
, Result          clob
);

-- === B) Call Fortnox for each year we want the data =============
insert into sie_file@inmemorystorage (FinancialYearId, Result)
select 27, Result
from   Fortnox.Sie.SieFileByType(4, 27);

insert into sie_file@inmemorystorage (FinancialYearId, Result)
select 26, Result
from   Fortnox.Sie.SieFileByType(4, 26);

select * from sie_file@inmemorystorage 

-- === C) Fast split to one row per line (keeps FY id) ========================
create or replace table sie_lines@inmemorystorage 
as
select f.FinancialYearId,
       t.line
from   sie_file@inmemorystorage f
join   csvtable(
         passing replace(f.Result, chr(13), '')   -- normalize CRLF -> LF
         row delimiter chr(10)                    -- split on LF
         column delimiter chr(9)                  -- dummy delimiter
         columns line varchar2 position next
       ) t
where  t.line is not null
and    length(trim(t.line)) > 0;

--------------------------------------------------------------------------------
-- TARGET TABLES
--------------------------------------------------------------------------------
create or replace table sie_vouchers@inmemorystorage
( series        varchar2(20),
  vouchernumber varchar2(50),
  verdate       varchar2(8),
  vertext       varchar2(2000),
  regdate       varchar2(8) );

create or replace table sie_trans@inmemorystorage
( series        varchar2(20),
  vouchernumber varchar2(50),
  verdate       varchar2(8),        -- from header
  vertext       varchar2(2000),     -- from header
  account       varchar2(20),
  objects_raw   varchar2(4000),
  amount        number,
  transdate     varchar2(8),
  transtext     varchar2(2000),
  quantity      number,
  sign          varchar2(200) );

--------------------------------------------------------------------------------
-- #TRANS ONE-PASS PARSER (ordered loop; no analytics; no heavy joins)
--------------------------------------------------------------------------------
-- #RTRANS = supplementary (an added line). Per the spec, it must be immediately followed by an identical #TRANS for backward compatibility.
--          If you handle #RTRANS, you should ignore that following #TRANS to avoid double-counting.
--          If you don’t handle #RTRANS, you can simply ignore the #RTRANS row and keep the following #TRANS (what your parser currently does). 
-- #BTRANS = removed (cancelled) transaction line. It’s there for history/audit; importers may ignore #BTRANS for normal ledger totals. 
declare
  cur_fyid    number;
  cur_fy      number;
  cur_series  varchar2;
  cur_vnr     varchar2;
  cur_verdate varchar2;
  cur_vertext varchar2;
  cur_regdate varchar2;
  trans_count number := 0;
  total_trans number;
  overall_start date := sysdate;
  batch_start   date;
  secs          number;
  cum_secs      number;
  started_batch number := 0; -- 0=false, 1=true
begin
  select count(*) into total_trans
  from sie_lines@inmemorystorage
  where substr(line,1,6) = '#TRANS';
  dbms_output.put_line('SIE parse started at ' || to_char(overall_start,'YYYY-MM-DD HH24:MI:SS') ||
                       ' | total transactions=' || to_char(total_trans));
  for r in (
    select FinancialYearId, FinancialYear, rowid$, line
    from   sie_lines@inmemorystorage
    where  substr(line,1,4) = '#VER'
        or substr(line,1,6) = '#TRANS'
    order  by FinancialYearId, rowid$
  ) loop
    if substr(r.line,1,4) = '#VER' then
      cur_fyid    := r.FinancialYearId;
      cur_fy      := r.FinancialYear;
      cur_series  := regexp_substr(r.line, '(?<=^#VER[ \t]+)\S+', 1, 1, 'c');
      cur_vnr     := regexp_substr(r.line, '(?<=^#VER[ \t]+\S+[ \t]+)\S+', 1, 1, 'c');
      cur_verdate := regexp_substr(r.line, '(?<=^#VER[ \t]+\S+[ \t]+\S+[ \t]+)\d{8}', 1, 1, 'c');
      cur_vertext := regexp_substr(r.line, '(?<=^#VER[ \t]+\S+[ \t]+\S+[ \t]+\d{8}[ \t]+")[^"]*(?=")', 1, 1, 'c');
      cur_regdate := regexp_substr(r.line, '(?<=^#VER[ \t]+\S+[ \t]+\S+[ \t]+\d{8}[ \t]+".*"[ \t]+)\d{8}', 1, 1, 'c');
      insert into sie_vouchers@inmemorystorage
        (FinancialYearId, FinancialYear, series, vouchernumber, verdate, vertext, regdate)
      values
        (cur_fyid, cur_fy, cur_series, cur_vnr, cur_verdate, cur_vertext, cur_regdate);
    elsif substr(r.line,1,6) = '#TRANS' then
      if started_batch = 0 then
        batch_start := sysdate;
        started_batch := 1;
      end if;
      insert into sie_trans@inmemorystorage
        (FinancialYearId, FinancialYear, series, vouchernumber, verdate, vertext,
         account, objects_raw, amount, transdate, transtext, quantity, sign)
      values
        (
          cur_fyid,
          cur_fy,
          cur_series,
          cur_vnr,
          cur_verdate,
          cur_vertext,
          regexp_substr(r.line, '(?<=^#TRANS[ \t]+)\d+', 1, 1, 'c'),
          regexp_substr(r.line, '(?<=^#TRANS[ \t]+\d+[ \t]+)\{[^}]*\}', 1, 1, 'c'),
          to_number(replace(regexp_substr(r.line, '(?<=^#TRANS[ \t]+\d+[ \t]+\{[^}]*\}[ \t]+)-?\d+(?:[.,]\d+)?', 1, 1, 'c'), ',', '.')),
          regexp_substr(r.line, '(?<=^#TRANS[ \t]+\d+[ \t]+\{[^}]*\}[ \t]+[^\s]+[ \t]+)\d{8}', 1, 1, 'c'),
          regexp_substr(r.line, '(?<=\s")[^"]*(?=")', 1, 1, 'c'),
          to_number(regexp_substr(r.line, '(?<=\s"[^"]*"\s)-?\d+(?:[.,]\d+)?', 1, 1, 'c')),
          regexp_substr(r.line, '(?<=\s"[^"]*".*")[^"]*(?=")', 1, 1, 'c')
        );
      trans_count := trans_count + 1;
      if mod(trans_count, 1000) = 0 or trans_count = total_trans then
        secs := (sysdate - batch_start) * 86400;
        cum_secs := (sysdate - overall_start) * 86400;
        dbms_output.put_line('Processed ' || to_char(trans_count) || '/' || to_char(total_trans) ||
                             ' transactions; last 1000 in ' || to_char(round(secs,2)) || 's; elapsed ' ||
                             to_char(round(cum_secs,2)) || 's' ||
                             ' | FY=' || to_char(cur_fy) ||
                             ' | Series=' || cur_series ||
                             ' | Voucher=' || cur_vnr);
        batch_start := sysdate;
      end if;
    end if;
  end loop;
  cum_secs := (sysdate - overall_start) * 86400;
  dbms_output.put_line('Completed ' || to_char(trans_count) || '/' || to_char(total_trans) ||
                       ' transactions in ' || to_char(round(cum_secs,2)) || 's');
  dbms_output.put_line('Total execution time: ' || to_char(round(cum_secs,2)) || 's');
end;


--------------------------------------------------------------------------------
-- OPTIONAL: OTHER TAGS (fast, 5-arg regex)
--------------------------------------------------------------------------------

-- #KONTO
create or replace table sie_accounts@inmemorystorage as
select
  regexp_substr(line, '(?<=^#KONTO[ \t]+)\S+', 1, 1, 'c')               as account,
  regexp_substr(line, '(?<=^#KONTO[ \t]+\S+[ \t]+")[^"]*(?=")', 1, 1, 'c') as name
from sie_lines@inmemorystorage
where line like '#KONTO %';

-- #SRU (expects "#SRU <acct> <code>")
create or replace table sie_sru@inmemorystorage as
select
  regexp_substr(line, '(?<=^#SRU[ \t]+)\S+', 1, 1, 'c')  as account,
  regexp_substr(line, '(?<=^#SRU[ \t]+\S+[ \t]+)\S+', 1, 1, 'c') as sru
from sie_lines@inmemorystorage
where line like '#SRU %';

-- #PSALDO 
-- robust parsing (works for 2024/2025)
-- If you carry FinancialYearId on sie_lines, keep it in the SELECT list too.
create or replace table sie_psaldo@inmemorystorage as
select
  /* DIM (allow negatives) */
  to_number(regexp_substr(line, '(?<=^#PSALDO[ \t]+)-?\d+', 1, 1, 'c'))                                   as dim,
  /* PERIOD (keep YYYYMM) */
  substr(regexp_substr(line, '(?<=^#PSALDO[ \t]+-?\d+[ \t]+)\d{4,8}', 1, 1, 'c'), 1, 6)                    as period_yyyymm,
  /* ACCOUNT (token before '{'; NULL on totals) */
  regexp_substr(line, '(?<=^#PSALDO[ \t]+-?\d+[ \t]+\d{4,8}[ \t]+)\S+(?=[ \t]+\{)', 1, 1, 'c')             as account,
  /* OBJECT LIST */
  regexp_substr(line, '\{[^}]*\}', 1, 1, 'c')                                                              as objects_raw,
  /* AMOUNT & QUANTITY */
  to_number(regexp_substr(replace(line, ',', '.'), '(?<=\}[ \t]+)-?\d+(?:\.\d+)?', 1, 1, 'c'))             as amount,
  to_number(regexp_substr(replace(line, ',', '.'), '(?<=\}[ \t]+\S+[ \t]+)-?\d+(?:\.\d+)?', 1, 1, 'c'))    as quantity,
  /* Simple & reliable flag */
  case when regexp_substr(line, '(?<=^#PSALDO[ \t]+-?\d+[ \t]+\d{4,8}[ \t]+)\S+(?=[ \t]+\{)', 1, 1, 'c') is null
       then 1 else 0 end                                                                                   as is_total_row
from sie_lines@inmemorystorage
where substr(line,1,8) = '#PSALDO ';


-- #IB / #UB
create or replace table sie_ibub@inmemorystorage as
select
  case when line like '#IB %' then 'IB' else 'UB' end                                                     as typ,
  to_number(regexp_substr(line, '(?<=^#(IB|UB)[ \t]+)\d+', 1, 1, 'c'))                                    as dim,
  regexp_substr(line, '(?<=^#(IB|UB)[ \t]+\d+[ \t]+)\S+', 1, 1, 'c')                                      as account,
  to_number(regexp_substr(replace(line, ',', '.'), '(?<=^#(IB|UB)[ \t]+\d+[ \t]+\S+[ \t]+)-?\d+(?:\.\d+)?', 1, 1, 'c')) as amount,
  to_number(regexp_substr(replace(line, ',', '.'), '(?<=^#(IB|UB)[ \t]+\d+[ \t]+\S+[ \t]+[^\s]+[ \t]+)-?\d+(?:\.\d+)?', 1, 1, 'c'))         as quantity
from sie_lines@inmemorystorage
where line like '#IB %' or line like '#UB %';

-- #DIM
create or replace table sie_dimensions@inmemorystorage as
select
  to_number(regexp_substr(line, '(?<=^#DIM[ \t]+)\d+', 1, 1, 'c'))    as dimension,
  regexp_substr(line, '(?<=^#DIM[ \t]+\d+[ \t]+")[^"]*(?=")', 1, 1, 'c') as name
from sie_lines@inmemorystorage
where line like '#DIM %';

-- #OBJEKT
create or replace table sie_objects@inmemorystorage as
select
  to_number(regexp_substr(line, '(?<=^#OBJEKT[ \t]+)\d+', 1, 1, 'c'))           as dimension,
  regexp_substr(line, '(?<=^#OBJEKT[ \t]+\d+[ \t]+")[^"]*(?=")', 1, 1, 'c')     as object_no,
  regexp_substr(line, '(?<=^#OBJEKT[ \t]+\d+[ \t]+"[^"]*"[ \t]+")[^"]*(?=")', 1, 1, 'c') as object_name
from sie_lines@inmemorystorage
where line like '#OBJEKT %';

--------------------------------------------------------------------------------
-- QUICK CHECKS
--------------------------------------------------------------------------------
select series, count(*) as voucher_count
from sie_vouchers@inmemorystorage
group by series
order by voucher_count desc;

select series, count(*) as trans_lines
from sie_trans@inmemorystorage
group by series
order by trans_lines desc;

select * from sie_vouchers@inmemorystorage

select * from sie_accounts@inmemorystorage

select * from sie_ibub@inmemorystorage

select * from sie_psaldo@inmemorystorage

Starting release 25.0.160, the missing character set conversion from code page 437 of the SIE (Swedish audit file) format as returned from Fortnox to Unicode will be included.

This ensures that all special characters such as a-umlaut are shown correctly.

Dit topic is 3 dagen na het laatste antwoord automatisch gesloten. Nieuwe antwoorden zijn niet meer toegestaan.