Hoe download ik alle documenten in een periode met raadsinformatie?

Zie:

Hoe kan ik de achterliggende _msearch uitlezen via Invantive UniversalSQL en dan massaal alle documenten in een periode oppakken ter voeding van Claude AI?

De payload voor POST naar _msearch op https://api.openraadsinformatie.nl/v1/elastic/*/_msearch is:

{"preference":"ResultList01"}
{
  "query": {
    "bool": {
      "must": [
        {
          "bool": {
            "must": [
              {
                "bool": {
                  "must": [
                    {
                      "simple_query_string": {
                        "fields": [
                          "text",
                          "title",
                          "description",
                          "name"
                        ],
                        "default_operator": "OR",
                        "query": "\"Ede\""
                      }
                    },
                    {
                      "terms": {
                        "_index": [
                          "ori_*",
                          "osi_*",
                          "owi_*"
                        ]
                      }
                    },
                    {
                      "term": {
                        "@type": "MediaObject"
                      }
                    }
                  ],
                  "must_not": [
                    {
                      "match": {
                        "@type": "Membership"
                      }
                    }
                  ]
                }
              },
              {
                "bool": {
                  "should": [
                    {
                      "terms": {
                        "_index": [
                          "ori_ede_20250412110204"
                        ]
                      }
                    }
                  ]
                }
              },
              {
                "range": {
                  "last_discussed_at": {
                    "gte": "2025-01-01T00:00:00",
                    "lte": "2027-03-01T00:00:00",
                    "boost": 2
                  }
                }
              }
            ]
          }
        }
      ]
    }
  },
  "highlight": {
    "pre_tags": [
      "<mark>"
    ],
    "post_tags": [
      "</mark>"
    ],
    "fields": {
      "text": {},
      "title": {},
      "name": {},
      "description": {}
    },
    "fragment_size": 100,
    "number_of_fragments": 3
  },
  "size": 20,
  "_source": {
    "includes": [
      "*"
    ],
    "excludes": []
  },
  "aggs": {
    "_index": {
      "terms": {
        "field": "_index",
        "size": 500,
        "order": {
          "_count": "desc"
        }
      }
    }
  },
  "from": 0,
  "sort": [
    {
      "last_discussed_at": {
        "order": "desc"
      }
    }
  ]
}

met een antwoord zoals:

{
  "took": 13,
  "responses": [
    {
      "took": 13,
      "timed_out": false,
      "_shards": {
        "total": 331,
        "successful": 331,
        "skipped": 0,
        "failed": 0
      },
      "hits": {
        "total": {
          "value": 10000,
          "relation": "gte"
        },
        "max_score": null,
        "hits": []
      },
      "aggregations": {
        "tags.http://www.w3.org/1999/02/22-rdf-syntax-ns#_8.https://argu.co/ns/meeting/tag.keyword": {
          "doc_count_error_upper_bound": 0,
          "sum_other_doc_count": 0,
          "buckets": []
        }
      },
      "status": 200
    }
  ]
}

Een voorbeeld is:

create or replace table subscriptions@InMemoryStorage
as
select t.id
,      t.recipient_name
,      t.recipient_email
,      t.index_filter
,      t.text_filter
,      t.rel_day_start
,      t.rel_day_end
,      replace
       ( case
         when t.index_filter like 'ori%'
         then 'Gemeente {0}'
         when t.index_filter like 'osi%'
         then 'Provincie {0}'
         when t.index_filter like 'owi%'
         then 'Waterschap {0}'
         when t.index_filter like 'ggm%'
         then 'Tweede Kamer'
         else 'Alle'
         end
       , '{0}'
       , initcap(substr(t.index_filter, 5))
       )
       index_label
,      to_char(trunc(sysdateutc) - t.rel_day_start, 'dd-mm-yyyy') rel_date_start_label
,      to_char(trunc(sysdateutc) - t.rel_day_end, 'dd-mm-yyyy') rel_date_end_label
,      to_char(trunc(sysdateutc) - t.rel_day_start, 'yyyy-mm-dd"t"hh24:mi:ss"z"') rel_date_start_c
,      to_char(trunc(sysdateutc) - t.rel_day_end, 'yyyy-mm-dd"t"hh24:mi:ss"z"') rel_date_end_c
from   ( select 1 id
         ,      'John Doe' recipient_name
         ,      'john.doe@acme.eu' recipient_email
         ,      'ori_harderwijk' index_filter
         ,      null text_filter
         ,      14 rel_day_start
         ,      0 rel_day_end
         union all
         select 2 id
         ,      'John Doe' recipient_name
         ,      'john.doe@acme.eu' recipient_email
         ,      'ori_amersfoort' index_filter
         ,      null text_filter
         ,      28 rel_day_start
         ,      0 rel_day_end
         union all
         select 3 id
         ,      'John Doe' recipient_name
         ,      'john.doe@acme.eu' recipient_email
         ,      'ori_ede' index_filter
         ,      null text_filter
         ,      28 rel_day_start
         ,      0 rel_day_end
       ) t

create or replace table actions@inmemorystorage
as
select ssn.id
,      ssn.recipient_name
,      ssn.recipient_email
,      ssn.rel_date_start_label
,      ssn.rel_date_end_label
,      ssn.index_filter
,      ssn.index_label
,      'https://api.openraadsinformatie.nl/v1/elastic/' || coalesce(ssn.index_filter, '*') || '/_search' url
,      'POST' method
,      'application/json' acceptMimeType
,      'application/json' contentType
,      true diskCache
,      3600 diskCacheMaxAgeSec
,      ''
       || '{ "query":'
       || '  { "bool":'
       || '    { "must":'
       || '      [ { "bool":'
       || '          { "must":'
       || '            [ { "bool":'
       || '                { "must":'
       || '                  [ { "term": { "@type": "MediaObject" } }'
       || case
          when ssn.text_filter is not null
          then '                  , { "simple_query_string":'
               || '                   { "fields": [ "text", "title","description", "name" ]'
               || '                   , "default_operator": "OR"'
               || '                   , ' || jsonelement('query', ssn.text_filter)
               || '                   }'
               || '                 }'
          end
       || '                  ]'
       || '                , "must_not":'
       || '                  [ { "match": { "@type": "Membership" } }'
       || '                  ]'
       || '                }'
       || '              }'
       || case
          when ssn.rel_day_start is not null or ssn.rel_day_end is not null
          then '            , { "range":'
               || '             { "last_discussed_at":'
               || '               { ' || jsonelement('gte', ssn.rel_date_start_c)
               || '               , ' || jsonelement('lte', ssn.rel_date_end_c)
               || '               }'
               || '             }'
               || '           }'
          end
       || '            ]'
       || '          }'
       || '        }'
       || '      ]'
       || '    }'
       || '  }'
       || ', "size": 250'
       || ', "_source":'
       || '  { "includes": [ "*" ]'
       || '  , "excludes": []'
       || '  }'
       || ', "from": 0'
       || ', "sort":'
       || '  [ { "last_discussed_at": { "order": "desc" } }'
       || '  ]'
       || '}'
       textPayload
,      'zori' || to_char(ssn.id) cacheKey
,      true ignoreWebError
,      30000 timeoutMs
from   subscriptions@InMemoryStorage ssn

insert into smtp@mail
( toEmail
, ccEmail
, bccEmail
, subject
, body
, attachment1Contents
, attachment1Name
, attachment1MimeType
)
select atn.recipient_email
       toEmail
,      cast(null as varchar2)
       ccEmail
,      cast(null as varchar2)
       bccEmail
,      'Open Raadsinformatie van ' 
       || atn.index_label
       || ' (' 
       || atn.rel_date_start_label
       || ' - ' 
       || atn.rel_date_end_label
       || ')'
       subject       
,      'Zie bijlage voor raadsstukken van '
       || atn.index_label
       || ' voor de periode ' 
       || atn.rel_date_start_label
       || ' - ' 
       || atn.rel_date_end_label
       || '.'
       body
,      ascii_to_blob(htp.CONTENTS_CHAR)
       attachment1Contents
,      atn.index_label
       || ' van '
       || atn.rel_date_start_label
       || ' tot '
       || atn.rel_date_end_label
       || '.json' 
       attachment1Name
,      'text/plain'
       attachment1MimeType
from   ACTIONS@InMemoryStorage atn
join   HTTPDOWNLOAD@DataDictionary
       ( atn.url
       , atn.acceptMimeType
       , timeoutMs => atn.timeoutMs
       , contentType => atn.contentType
       , method => atn.method
       , diskCache => atn.diskCache
       , diskCacheMaxAgeSec => atn.diskCacheMaxAgeSec
       , textPayload => atn.textPayload
       , cacheKey => atn.cacheKey
       , ignoreWebError => atn.ignoreWebError
       ) htp

In de stuurtabel subscriptions kunnen filters op gemeente en periode vastgelegd worden.

Het verwerken duurt circa 1 seconde per subscription.