#!/usr/bin/env ruby
# frozen_string_literal: true
#
# xml_to_sqlite.rb
#
# Streams XML from STDIN, extracts <note> ... </note> blocks, and writes them into a SQLite DB.
# Tries to use Nokogiri if available; falls back to REXML (stdlib) for parsing each note chunk.
#
# Usage:
#   cat notes.xml | ruby xml_to_sqlite.rb
#
require 'sqlite3'

# Try to load Nokogiri; fall back to REXML if not available.
nokogiri_available = begin
  require 'nokogiri'
  true
rescue LoadError
  false
end

require 'rexml/document' unless nokogiri_available
require 'stringio'

DB_FILE = 'notes.db'

# --- Initialize DB ---
db = SQLite3::Database.new(DB_FILE)
db.execute_batch <<~SQL
  PRAGMA foreign_keys = ON;

  CREATE TABLE IF NOT EXISTS notes (
    id INTEGER PRIMARY KEY,
    lat REAL,
    lon REAL,
    created_at TEXT,
    closed_at TEXT
  );

  CREATE TABLE IF NOT EXISTS comments (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    note_id INTEGER,
    action TEXT,
    timestamp TEXT,
    uid INTEGER,
    user TEXT,
    text TEXT,
    FOREIGN KEY(note_id) REFERENCES notes(id)
  );
SQL

insert_note_stmt = db.prepare(
  "INSERT OR REPLACE INTO notes (id, lat, lon, created_at, closed_at) VALUES (?, ?, ?, ?, ?)"
)
insert_comment_stmt = db.prepare(
  "INSERT INTO comments (note_id, action, timestamp, uid, user, text) VALUES (?, ?, ?, ?, ?, ?)"
)

# Helper: parse a single <note>...</note> XML string and insert into DB
def process_note_xml(xml_str, nokogiri_available, insert_note_stmt, insert_comment_stmt)
  if nokogiri_available
    doc = Nokogiri::XML(xml_str) { |cfg| cfg.noblanks } # remove blank nodes
    note_el = doc.at_xpath('/note')
    return unless note_el
    note_id = note_el['id']
    lat = note_el['lat']
    lon = note_el['lon']
    created_at = note_el['created_at']
    closed_at = note_el['closed_at']
    insert_note_stmt.execute(note_id, lat, lon, created_at, closed_at)
    # iterate comments
    note_el.xpath('comment').each do |c|
      action = c['action']
      timestamp = c['timestamp']
      uid = c['uid']
      user = c['user']
      text = c.text ? c.text.strip : nil
      insert_comment_stmt.execute(note_id, action, timestamp, uid, user, text)
    end
  else
    # Use REXML
    # REXML::Document expects a complete XML document; we wrap the note in a dummy root.
    wrapped = "<root>#{xml_str}</root>"
    doc = REXML::Document.new(wrapped)
    note_el = doc.root.elements['note']
    return unless note_el
    note_id = note_el.attributes['id']
    lat = note_el.attributes['lat']
    lon = note_el.attributes['lon']
    created_at = note_el.attributes['created_at']
    closed_at = note_el.attributes['closed_at']
    @count += 1
    puts "#{@count} Inserting note #{note_id}" if @count % 100 == 0
    insert_note_stmt.execute(note_id, lat, lon, created_at, closed_at)
    note_el.elements.each('comment') do |c|
      action = c.attributes['action']
      timestamp = c.attributes['timestamp']
      uid = c.attributes['uid']
      user = c.attributes['user']
      # REXML::Element#text may be nil; join all text nodes to preserve newlines
      text = (c.text || '').strip
      insert_comment_stmt.execute(note_id, action, timestamp, uid, user, text)
    end
  end
rescue => e
  warn "Warning: failed to parse/insert a note block: #{e.class}: #{e.message}"
end

@count = 0
# --- Stream from STDIN, split by closing </note> tags ---
buffer = ''.dup
note_close_tag = '</note>'
# We'll support optional XML declaration or whitespace before notes, and multiple notes per input.

# Read in binary to preserve any encodings; we'll rely on the XML parser to handle encoding declarations.
STDIN.binmode
while chunk = STDIN.read(16 * 1024)
  buffer << chunk
  # process all complete </note> occurrences in buffer
  loop do
    idx = buffer.index(note_close_tag)
    break unless idx
    end_idx = idx + note_close_tag.length
    note_chunk = buffer.slice!(0...end_idx)
    # Trim leading whitespace/newlines so the chunk is a single <note>...</note>
    note_chunk.strip!
    next if note_chunk.empty?
    # Some inputs may contain multiple top-level nodes or stray characters; ensure we start with <note
    start_idx = note_chunk.index('<note')
    if start_idx && start_idx > 0
      # discard any leading garbage before <note
      note_chunk = note_chunk[start_idx..-1]
    end
    # Process this note chunk
    process_note_xml(note_chunk, nokogiri_available, insert_note_stmt, insert_comment_stmt)
  end
end

# After EOF, there might be remaining buffered data that doesn't end with </note>.
# Try to see if it contains a final complete note (rare), otherwise ignore.
if !buffer.strip.empty?
  # attempt to extract a trailing <note>...</note> if present
  if m = buffer.match(/(<note\b.*<\/note>)/m)
    note_chunk = m[1].strip
    process_note_xml(note_chunk, nokogiri_available, insert_note_stmt, insert_comment_stmt)
  else
    warn "Warning: leftover data after EOF that does not contain a complete </note> block; ignoring."
  end
end

insert_note_stmt.close
insert_comment_stmt.close
db.close

puts "Done at #{Time.now.to_s}. Data saved to #{DB_FILE} (parser: #{nokogiri_available ? 'nokogiri' : 'rexml'})"
