mz-language-tags.scm
(module mz-language-tags mzscheme
        (require (planet "bot.scm" ("oesterholt" "webbot.plt" 1 0)))
        (require (lib "getinfo.ss" "setup"))
        (require (lib "string.ss" "srfi" "13"))
        (require (lib "list.ss"))
        (provide get-tag-for-lang
                 get-lang-for-tag
                 get-type-for-tag
                 get-language-list
                 get-language-types
                 get-description-list-for-type
                 get-tag-list-for-type
                 get-language-tag-list
                 language-tags-from-iana?
                 mz-language-tags-documentation
                 )
        
        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
        ;; Support functions
        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
        
        (define (substr s from . _to)
          (let ((L (string-length s)))
            (let ((to (if (null? _to) 
                          L
                          (if (>= (car _to) L)
                              L
                              (car _to)))))
              (if (< to from)
                  (let ((H to))
                    (set! to from)
                    (set! from H)))
              (if (>= from L)
                  ""
                  (substring s from to)))))
        
        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
        ;; Module variables
        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
        
        (define IANA-TAG-REGISTRY "http://www.iana.org/assignments/language-subtag-registry")
        (define LANGUAGES (list))
        
        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
        ;; Exported functions
        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
        
        
        (spod-module-def)
        (spod-module-add (s= "mz-language-tags - iana language tags"))
        
        (spod-module-add 
         (sp "This module provides an interface to standardized language tags as described in RFC4646."
             "The module is initialized from a IANA file that has been included with the distribution."
             "However, it also starts a thread to download the IANA file from the internet and refresh."
             "the contents of the languages. If it fails, it fails silently and relies on the provided "
             "tags with the distribution, however, it will keep trying to load the IANA file every 60 "
             "seconds."))
        
        (spod-module-add
         (s== "Provided functions")
         (s=== (s% "(get-tag-for-lang language:string?) --> tag:symbol?"))
         (sp "Returns a language tag for a given language description (e.g. dutch).")
         (sp "If it doesn't find the language, it returns #f"))
        (define (get-tag-for-lang lang)
          (letrec ((f (lambda (L)
                        (if (null? L)
                            #f
                            (if (string-ci=? (caar L) lang)
                                (cadar L)
                                (f (cdr L)))))))
            (f LANGUAGES)))
        
        
        (spod-module-add
         (s=== (s% "(get-lang-for-tag tag:symbol?) --> languages:list of string?"))
         (sp "Returns the language descriptions valid for a given language tag. If it " 
             "doesn't find the tag, it returns #f."))
        (define (get-lang-for-tag tag)
          (letrec ((f (lambda (L)
                        (if (null? L)
                            (list)
                            (if (eq? (cadar L) tag)
                                (cons (caar L) (f (cdr L)))
                                (f (cdr L)))))))
            (let ((R (f LANGUAGES)))
              (if (null? R)
                  #f
                  R))))
        
        (spod-module-add
         (s=== (s% "(get-type-for-tag tag:symbol?) --> type:symbol?"))
         (sp "Returns the type of tag, e.g. 'language, 'region or 'script, or #f if tag hasn't been found."))
        (define (get-type-for-tag tag)
          (letrec ((f (lambda (L)
                        (if (null? L)
                            (list)
                            (if (eq? (cadar L) tag)
                                (caddar L)
                                (f (cdr L)))))))
            (let ((R (f LANGUAGES)))
              (if (null? R)
                  #f
                  R))))
        
        (spod-module-add
         (s=== (s% "(get-language-types) --> types:list of symbol?"))
         (sp "Returns the list of all IANA types."))
        (define (get-language-types)
          (let ((T (make-hash-table)))
            (for-each (lambda (e)
                        (hash-table-put! T (caddr e) (caddr e)))
                      LANGUAGES)
            (hash-table-map T (lambda (k v) v))))
        
        (spod-module-add
         (s=== (s% "(get-language-list-for-type) --> languages:list of string?"))
         (sp "Returns the list of all IANA descriptions for type language/script/region, whatever."))
        (define (get-description-list-for-type t)
          (sort (map car (filter (lambda (e)
                                   (eq? (caddr e) t))
                                 LANGUAGES)) string-ci<?))
        
        (spod-module-add
         (s=== (s% "(get-language-list) --> languages:list of string?"))
         (sp "Returns the list of all IANA languages, sorted."))
        (define (get-language-list)
          (get-description-list-for-type 'language))
        
        (spod-module-add
         (s=== (s% "(get-tag-list-for-type) --> tags:list of symbol?"))
         (sp "Returns the list of all IANA tags for type language/script/region, whatever."))
        (define (get-tag-list-for-type t)
          (map cadr (filter (lambda (e) (eq? (caddr e) t)) LANGUAGES)))

        (spod-module-add
         (s=== (s% "(get-language-tag-list) --> tags:list of symbol?"))
         (sp "Returns the list of all IANA tags for type language."))
        (define (get-language-tag-list)
          (get-tag-list-for-type 'language))
        
        (spod-module-add
         (s=== (s% "(language-tags-from-iana?) --> boolean?"))
         (sp "Returns #t, if the language list has been refreshed from IANA."))
        (define language-tags-from-iana? (lambda () #f))

        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
        ;; Module documentation
        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
        
        (define %module-doc (spod-module-doc))
        (define (mz-language-tags-documentation)
          %module-doc)
        
        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
        ;; Module initialization
        ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
        
        (define (parse-languages file fh)
          (letrec ((R (lambda (fh tag langs type line-no)
                        (let ((l (read-line fh)))
                          (if (eof-object? l)
                              '()
                              (cond
                               ((string=? (substr l 0 2) "%%")
                                (cond
                                 ((and (not (null? langs)) (not (eq? tag #f)))
                                  (append (map (lambda (lang) (list lang tag type)) langs)
                                          (R fh #f '() 'nil (+ line-no 1))))
                                 ((and (not (eq? type 'nil)) (or (null? langs) (eq? tag #f)))
                                  (error (format "mz-language-tags: Unexpected: file format for file ~a at line ~a"
                                                 file line-no)))
                                 (else
                                  (R fh #f '() 'nil (+ line-no 1)))))
                               ((string-ci=? (substr l 0 5) "Type:")
                                (let ((val (string->symbol (string-trim-both (substr l 5)))))
                                  (R fh tag langs val (+ line-no 1))))
                               ((string-ci=? (substr l 0 7) "Subtag:")
                                (let ((val (string->symbol (string-trim-both (substr l 7)))))
                                  (R fh val langs type (+ line-no 1))))
                               ((string-ci=? (substr l 0 4) "Tag:")
                                (let ((val (string->symbol (string-trim-both (substr l 4)))))
                                  (R fh val langs type (+ line-no 1))))
                               ((string-ci=? (substr l 0 12) "Description:")
                                (let ((val (string-trim-both (substr l 12))))
                                  (R fh tag (cons val langs) type (+ line-no 1))))
                               (else
                                (R fh tag langs type (+ line-no 1)))))))))
            (R fh #f '() 'nil 1)))

        
        (let ((dirs (find-relevant-directories '(mz-language-tags) )))
          (if (null? dirs)
              (display "mz-language-tags: WARNING: cannot initialize language tags from distribution.\n")
              (let ((file (build-path (car dirs) "language-subtag-registry.dat")))
                (let ((fh (open-input-file file)))
                  (set! LANGUAGES (parse-languages (path->string file) fh))
                  (close-input-port fh)))))
        
        (thread (lambda ()
                  (let ((bot (form-data)))
                    (letrec ((F (lambda ()
                                  (let ((fh (-> bot get IANA-TAG-REGISTRY)))
                                    (if (eq? fh 'no-contact)
                                        (begin
                                          (sleep 60)
                                          (F))
                                        (begin
                                          (set! LANGUAGES (parse-languages IANA-TAG-REGISTRY fh))
                                          (close-input-port fh)
                                          (set! language-tags-from-iana? (lambda () #t))))))))
                      (F)
                      #t))))

        )