I'm reading a couple of NOAA directories on the web, just to find out what the latest data is. But that is taking forever, like 500 seconds (!). Whereas once I find the file I need, I can read its 28756136 bytes in 6 seconds. The directory listings are really short. I'm trying to figure out why reading them it taking so long.
#lang racket
(require net/url html-parsing)
(define nbm-base "https://nomads.ncep.noaa.gov/pub/data/nccf/com/blend/prod/blend.")
; (nbm-webdir "20230731")
; => "https://nomads.ncep.noaa.gov/pub/data/nccf/com/blend/prod/blend.20230731/"
(define (nbm-webdir ymd8)
(string-append nbm-base ymd8 "/"))
; (nbm-textdir "20230731" "18")
; => "https://nomads.ncep.noaa.gov/pub/data/nccf/com/blend/prod/blend.20230731/18/text/"
(define (nbm-textdir ymd8 hour)
(string-append (nbm-webdir ymd8) hour "/text/"))
; (nbm-product "20230731" "18" 'hourly)
; => "https://nomads.ncep.noaa.gov/pub/data/nccf/com/blend/prod/blend.20230731/18/text/blend_nbhtx.t18z"
(define (nbm-product ymd8 hour product-code)
(string-append (nbm-textdir ymd8 hour) "blend_nb" (product-letter product-code) "tx.t" hour "z"))
(define (product-letter product-code)
(match product-code
['hourly "h"]
['short "s"]
['extended "e"]
['super-extended "x"]
['probabilistic-extended "p"]
[_ (error (format "unknown product-code: ~a" product-code))]))
(define (nmb-product ymd8 hour product-code)
(string-append (nbm-textdir ymd8 hour) "/"))
(define (get-webdir-xexp ymd8)
(html->xexp (get-pure-port (string->url (nbm-webdir ymd8)))))
(define (get-textdir-xexp ymd8 hour)
(html->xexp (get-pure-port (string->url (nbm-textdir ymd8 hour)))))
(define (get-product-bytes ymd8 hour product-code)
(port->bytes (get-pure-port (string->url (nbm-product ymd8 hour product-code)))))
(define x-webdir-xexp (time (get-webdir-xexp "20230731")))
(define x-textdir-xexp (time (get-textdir-xexp "20230731" "18")))
(define x-product-bytes (time (get-product-bytes "20230731" "18" 'hourly)))
cpu time: 67555 real time: 500331 gc time: 93
cpu time: 64171 real time: 500488 gc time: 81
cpu time: 2867 real time: 6364 gc time: 77