# $Id: importNOS_CurRefs.rb 7603 2022-01-08 14:40:38Z flaterco $
# Functions for importNOS.rb:
# Current reference station import

def importCurRefs(db)
  if File.exist?("renamings-currefs.sql")
    puts "Don't want to overwrite renamings-currefs.sql; do something with it"
    exit
  end

  stateMap = getOldStateMap(db)

  # The undocumented MDAPI (1.0) geogroup children query (getting stations)
  # does not work for currents.  In lieu of that, geogroups are parsed out of
  # the old style station list web pages by parse-oldcurlists.rb and left in
  # the table currents_geogroups.  This needn't be done if there are no new
  # stations.  getCurrentsGeogroups returns an empty hash if the table is not
  # present.
  currentsGeogroups = getCurrentsGeogroups(db)

  # Report missing/extra stations.
  oldcurlistsids = Set.new(currentsGeogroups.keys)
  currentsjsonsids = Set.new

  # Pass 1.
  # There will be no harcon jsons for any sids not in currents.json, but the
  # bins are a superset of those listed in currents.json.  So first, compile
  # metadata by sid.  Note that we won't have mean directions for the bins
  # that are missing from currents.json.  (We do get azimuth.)
  currents = readCurJson()
  currentsmd = Hash.new
  binsmd = Hash.new
  currents.each {|csta|

    # Avoid adding code that isn't exercised.
    raise "Notices are not null anymore" unless csta["notices"].nil?
    raise "Disclaimers are not null anymore" unless csta["disclaimers"].nil?

    sid = csta["id"]
    currentsjsonsids.add(sid)

    # type is H, S, W, or Y.
    #   H = harmonic
    #   S = subordinate
    #   W = weak and variable with null offsets
    #   Y = undocumented ?  Is that like X only 1 more?
    # 2020-12 type Y has gone away.
    type = csta["type"]
    raise "Unknown type #{type}" unless "HSW".include?(type)
    next if type != "H"

    # Not expecting hits on ref stations, but may as well check.
    if isMaybeNotFreeCurrent(sid)
      print "** Skipped ", sid, " (non-US)\n"
      next
    end

    if currentsmd.include?(sid)
      # All unchecked assumptions shall be violated.
      if currentsmd[sid][:lat] != csta["lat"]
        raise "Currentsmd lat mismatch"
      elsif currentsmd[sid][:lng] != csta["lng"]
        raise "Currentsmd lng mismatch"
      elsif currentsmd[sid][:cname] != csta["name"]
        raise "Currentsmd cname mismatch"
      end
    else
      cname = csta["name"]

      # As of 2020-12, the only western Aleutians in currents.json are Rat
      # Islands, and they correctly show lng > 0.
      lat = csta["lat"]
      lng = csta["lng"]

      oldtuple = getOldCurrentNameStateTz(db, sid)
      newStation = oldtuple.nil?
      if newStation
	begin
	  statename, region, subregion = guessCurrentsGeogroups(currentsGeogroups, sid)
	rescue
	  print "** #{lat} #{lng} #{cname}\n"
	  raise
	end
	raise "Geogroup contained a null state" if statename.nil?
	raise "Need StateCode for #{statename}" unless StateCode.include?(statename)
	gstate = StateCode[statename]

	# currents.json has no state field and geogroups states are unreliable.
	# Hunt for errors.
	state, dist = guessStateDumb(stateMap, lat, lng)
	raise "State guess is nil" if state.nil?
	if state != gstate
	  print "** Possible wrong state, #{sid} #{lat} #{lng} #{cname}\n"
	  print "   Geogroup state #{gstate}\n"
	  print "   Guessed #{state} (d=#{"%.0e" % dist})\n"
	  # In 2020-12, the guess was better than the geogroup 43/55.
	  raise "No StateName for #{state}" unless StateName.include?(state)
	  statename = StateName[state]
	end

	# As of 2020-12, currents from MDAPI are all assigned to 21 states and
	# Puerto Rico, notwithstanding a few WA-BC border cases.  guessTimezone
	# needs the name parameter only for FM, MH, and Pacific atolls.
	tz = guessTimezone(state, cname, lat, lng)

	currentsmd[sid] = {
	  newStation: newStation,
	  cname: cname,
	  lat: lat,
	  lng: lng,
	  geogroup: [statename, region, subregion],
	  state: state,
	  gstate: gstate,
	  tz: tz
	}
      else
	oldname, state, tz = oldtuple
        oldname = chompname(oldname)
        name = fixname(oldname)
        if name != oldname
          print "#{sid} fixing up name from data_sets_old\n"
          print "  #{oldname}\n"
          print "  #{name}\n"
        end
	currentsmd[sid] = {
	  newStation: newStation,
	  cname: cname,
	  name: name,  # contains (depth ...) with random depth
	  lat: lat,
	  lng: lng,
	  state: state,
	  tz: tz
	}
      end
    end

    # Store the bins.
    sidp = sid + "_" + csta["currbin"].to_s
    # The offsets record is null and bogus except that it's where the mean
    # directions are reported.  Azimuth in the harcon record is close to
    # maxdir but not the same.
    offsets = csta["currentpredictionoffsets"]
    binsmd[sidp] = {
      depth: csta["depth"],             # can be null
      depthType: csta["depthType"],
      maxdir: offsets["meanFloodDir"],  # degrees true
      mindir: offsets["meanEbbDir"]     # degrees true
    }
  }

  setdif2 = oldcurlistsids-currentsjsonsids
  unless setdif2.empty?
    print "** Sids in oldcurlists that are not in currents.json:  #{setdif2.to_a}\n"
  end

  # 2020-12 patch for CFR1609.
  # 2021-12, it's still broken.
  if binsmd["CFR1609_3"][:depth]  == 14 and
     binsmd["CFR1609_15"][:depth] == 14 and
     binsmd["CFR1609_25"][:depth] == 14
    binsmd["CFR1609_3"][:depth] =
      binsmd["CFR1609_15"][:depth] =
      binsmd["CFR1609_25"][:depth] = Float::NAN
  end

  # Pass 2, add the stations.
  # Enumerate sids from currentsmd, bins from the harcon.
  aliases = getConstituentAliases(db)
  File.open("renamings-currefs.sql","w") {|renamings|
    currentsmd.each_pair {|sid, md_sta|
      bins = getCurrentConstForSid(db, aliases, sid)
      if bins.nil?
	print "** #{sid} missing harcon file\n"
	raise "missing harcon file"
      elsif bins.empty?
	print "** #{sid} harcon is null; skipping it.\n"
      else
	bins.each_pair {|bin_num, harcon_bin|
	  sidp = sid + "_" + bin_num.to_s
	  md_bin = binsmd[sidp]
	  comments = nil

	  # Depths have problems.  md_bin[:depth] matches the name on the web
	  # site but is often missing.  harcon_bin[:depth] often seems to be of
	  # a different depth type but there's no indication.  Sometimes
	  # harcon_bin[:depth] is too large to be depth in feet or meters and
	  # its relationship to md_bin[:depth] is mysterious.  Even when they
	  # agree, they are rounded differently.

	  # Nothing I can do but document the problem in comments.  What could
	  # I say in a note that won't just confuse?

	  if md_bin.nil? or md_bin[:depth].nil?
	    depth = harcon_bin[:depth]
	    comments = "No depth was provided in the stations list.  Depth is as specified in the\nharcon record.  Unknown whether it is below surface or chart datum."
	  elsif md_bin[:depth].nan?
	    depth = harcon_bin[:depth]
	    comments = "Depth provided in the stations list was duplicated for all bins.  Depth is as\nspecified in the harcon record.  Unknown whether it is below surface or chart\ndatum."
	  else
	    depth = md_bin[:depth]
	    comments = "Depth is as provided in the stations list.\n"
	    case md_bin[:depthType]
	    when "B"
	      comments += "Depth is below chart datum."
	    when "S"
	      comments += "Depth is below surface."
	    when "U"
	      comments += "Unknown whether it is below surface or chart datum."
	    else
	      raise "Unknown depth type"
	    end
	    if (md_bin[:depth]-harcon_bin[:depth]).abs > 1
	      comments = append(comments, "** Depth specified in the harcon record is #{harcon_bin[:depth]} ft.")
	    end
	  end

	  if md_sta[:newStation]
	    name = generateName(md_sta[:cname], depth, md_sta[:geogroup], "Current")
	    mname = db.escape_string(name)
	    renamings.print "-- #{sidp} #{md_sta[:lat]} #{md_sta[:lng]}\n"
	    renamings.print "-- orig name: #{md_sta[:cname]}\n"
	    gtext = (currentsGeogroups.include?(sid) ? "geogroup" : "geogroup (GUESSED)")
	    renamings.print "-- #{gtext}: #{md_sta[:geogroup]}\n"
	    if md_sta[:gstate] != md_sta[:state]
              ts1 = (md_sta[:gstate].nil? ? "null" : md_sta[:gstate])
              ts2 = (md_sta[:state].nil? ? "null" : md_sta[:state])
	      renamings.print "-- Possibly wrong state:  orig geogroup said #{ts1}, guessed #{ts2}\n"
            end
	    renamings.print "update data_sets\n"
	    renamings.print "  set state = ",
	      (md_sta[:state].nil? ? "null" : "'#{md_sta[:state]}'"), ",\n"
	    renamings.print "  name = '#{mname}'\n  where name = '#{mname}';\n"
	  else
	    name = md_sta[:name]
	    raise "Depth is nil" if depth.nil?
	    depthpat = /\(depth [0-9\.]+ ft\)/
	    raise "No depth in old name #{name}" if depthpat.match(name).nil?
	    # Rounding and formatting here must match that in generateName.
	    depth = depth.round.to_s
	    name.sub!(depthpat, "(depth #{depth} ft)")
	  end
	  raise "Name in use" if nameInUse(db, name)

	  mindir = maxdir = nil
	  unless md_bin.nil?
	    mindir = md_bin[:mindir]
	    maxdir = md_bin[:maxdir]
	  end
	  if maxdir.nil?
	    maxdir = harcon_bin[:azimuth]
	    comments = append(comments, "No mean flood direction provided.  Used azimuth from harcon record.")
	  end

	  # Add data set record.  datumkind should be left null.
	  units = "knots"    # Future support SI units
	  legal = nil        # Saved for future use
	  notes = nil        # Saved for future use
	  xfields = "Credit:NOAA data processed by David Flater for XTide\n https://flaterco.com/xtide/"
	  res = db.exec("insert into data_sets (name, original_name, station_id_context, station_id, lat, lng, timezone, state, country, units, source, legalese, notes, comments, restriction, xfields, min_dir, max_dir, meridian, datum) values ($1, $2, 'NOS', $3, $4, $5, $6, $7, 'USA', $8, 'CO-OPS Metadata API', $9, $10, $11, 'Public domain', $12, $13, $14, $15, $16) returning index", [name, md_sta[:cname], sidp, md_sta[:lat], md_sta[:lng], md_sta[:tz], md_sta[:state], units, legal, notes, comments, xfields, mindir, maxdir, '0:00', harcon_bin[:datum]])
	  index = res.getvalue(0,0).to_i

	  # Add constants.
	  const_added = 0
	  harcon_bin[:constants].each_pair {|constname, const|
	    # De-aliasing of constname was already done.
	    amp, phase = const[:amp], const[:phase]
	    next if amp < 0.00005
	    phase = 0 if phase == 360  # This happens
	    raise "Phase out of range" if phase < 0 or phase >= 360 # Also happens...
	    res = db.exec("insert into constants values ($1, $2, $3, $4)",
			   [index, constname, phase, amp])
	    const_added += 1
	  }
	  if const_added.zero?
	    print "** No harmonic constants added for #{sidp} #{name}\n"
	    raise "Broken station"
	  end
	}
      end
    }
  }
end
