gle.tdl

;;; -*- Mode: tdl; Coding: utf-8; -*-

;;;
;;; Copyright (c) 2000 -- 2018 Dan Flickinger (danf@stanford.edu);
;;; copyright (c) 2009 -- 2018 Stephan Oepen (oe@ifi.uio.no);
;;; see `LICENSE' for conditions.
;;;

;;;
;;; generic lexical entries, i.e. entries that are not activated directly by
;;; orthography.  all generic entries are distinguished from native lexical
;;; entries by virtue of their [ ONSET unk_onset ].  generics are sub-divided
;;; into two sub-classes, named (somewhat unfortunately) `unknown' generics 
;;; and `generic' generics (that is `unk' and `gen', respectively).
;;;
;;; the former are designed for unknown words and activated on the basis of
;;; PoS tags, the latter correspond to named entities that are recognized by
;;; string-level properties during chart mapping (the equivalent of what in
;;; the YY tradition used to be `ersatz' lexical entries).  see the comments
;;; in `tmr.tdl' (token mapping) and `lfr.tdl' (lexical filtering) for further
;;; background.
;;;
;;; _fix_me_
;;; i believe nothing stands in our way of giving these entries a `meaningful'
;;; orthography now, e.g. something like "_generic_trans_verb_bse_", or so, for
;;; interactive testing (including the LKB).
;;;
;;; furthermore, i suspect we can now ditch the older `gle' entries, used for
;;; generation only, and instead use some of the entries below (those with a
;;; singleton RELS list, fully instantiated PRED, and underspecified CARG) in
;;; *generic-lexical-entries*.                                 (22-jan-09; oe)
;;;


;;;
;;; the first group of `unknown' generic entries, each for a specific PoS tag
;;;

generic_trans_verb_bse := v_np*_bse-unk_le &
  [ ORTH < "_generic_vb_" >,
    TOKENS.+LIST < [ +TNT.+TAGS.FIRST "VB" ] > ].

generic_trans_verb_pres3sg := v_np*_pr-3s-unk_le &
  [ ORTH < "_generic_vbz_" >,
    TOKENS.+LIST < [ +TNT.+TAGS.FIRST "VBZ" ] > ].

generic_trans_verb_presn3sg := v_np*_pr-n3s-unk_le &
  [ ORTH < "_generic_vbp_" >,
    TOKENS.+LIST < [ +TNT.+TAGS.FIRST "VBP" ] > ].

generic_trans_verb_past := v_np*_pa-unk_le &
  [ ORTH < "_generic_vbd_" >,
    TOKENS.+LIST < [ +TNT.+TAGS.FIRST "VBD" ] > ].

generic_trans_verb_prp := v_np*_prp-unk_le &
  [ ORTH < "_generic_vbg_" >,
    TOKENS.+LIST < [ +TNT.+TAGS.FIRST "VBG" ] > ].

generic_trans_verb_psp := v_np*_psp-unk_le &
  [ ORTH < "_generic_vbn_" >,
    TOKENS.+LIST < [ +TNT.+TAGS.FIRST "VBN" ] > ].

generic_trans_verb_pas := v_-_pas-unk_le &
  [ ORTH < "_generic_vbn_" >,
    TOKENS.+LIST < [ +TNT.+TAGS.FIRST "VBN" ] > ].

generic_mass_noun := n_-_mc-unk_le &
  [ ORTH < "_generic_fw_" >,
    TOKENS.+LIST < [ +TNT.+TAGS.FIRST "FW" ] > ].

generic_mass_count_noun := n_-_mc-unk_le &
  [ ORTH < "_generic_nn_" >,
    TOKENS.+LIST < [ +TNT.+TAGS.FIRST "NN" ] > ].

;; This is a robustness measure to accommodate a TnT quirk (see tmr/pos.tdl)
;;
generic_mass_count_noun_cctag := n_-_mc-unk_le &
  [ ORTH < "_generic_nn_" >,
    TOKENS.+LIST < [ +TNT.+TAGS.FIRST "CC" ] > ].

generic_pl_noun := n_-_c-pl-unk_le &
  [ ORTH < "_generic_nns_" >,
    TOKENS.+LIST < [ +TNT.+TAGS.FIRST "NNS" ] > ].

genericname := n_-_pn-unk_le &
  [ ORTH < "_generic_nnp_" >,
    TOKENS.+LIST < [ +TNT.+TAGS.FIRST "NNP" ] > ].

generic_quoted := n_-_gq_le &
  [ ORTH < "_generic_quoted_" >,
    TOKENS.+LIST < [ +TNT.+TAGS.FIRST "GQ" ] > ].

;; DPF 2017-02-01 - The tagger is not always right with the NNPS tag, as with
;; |[[Franglais]], for example, ...|  So ignore that plural, and let the parser
;; decide on inflection.
#|
genericname_pl := n_-_pn-pl-unk_le &
  [ ORTH < "_generic_nnps_" >,
    TOKENS.+LIST < [ +TNT.+TAGS.FIRST "NNPS" ] > ].
|#
genericname_pl := n_-_pn-unk_le &
  [ ORTH < "_generic_nnps_" >,
    TOKENS.+LIST < [ +TNT.+TAGS.FIRST "NNPS" ] > ].

generic_adj := aj_-_i-unk_le &
  [ ORTH < "_generic_jj_" >,
    TOKENS.+LIST < [ +TNT.+TAGS.FIRST "JJ" ] > ].

generic_adj_compar := aj_-_i-cmp-unk_le &
  [ ORTH < "_generic_jjr_" >,
    TOKENS.+LIST < [ +TNT.+TAGS.FIRST "JJR" ] > ].

generic_adj_superl := aj_-_i-sup-unk_le &
  [ ORTH < "_generic_jjs_" >,
    TOKENS.+LIST < [ +TNT.+TAGS.FIRST "JJS" ] > ].

generic_number := aj_-_i-crd-unk_le &
  [ ORTH < "_generic_cd_" >,
    TOKENS.+LIST < [ +TNT.+TAGS.FIRST "CD" ] > ].

generic_adverb := av_-_i-unk_le &
  [ ORTH < "_generic_rb_" >,
    TOKENS.+LIST < [ +TNT.+TAGS.FIRST "RB" ] > ].

;; DPF 2012-12-21 - For e.g. |zounds|
;;
generic_uh_disc_adv := av_-_dc-like-unk_le &
 [ ORTH < "_generic_uh_" >,
   TOKENS.+LIST < [ +TNT.+TAGS.FIRST "UH" ] > ].

;; DPF 2016-11-23 - For stranded punctuation mark tokens
;;
generic_punct := av_-_dc-like-unk_le &
 [ ORTH < "_generic_punct_" >,
   TOKENS.+LIST < [ +TNT.+TAGS.FIRST "." ] > ].


;;;
;;; and the second group of `generic' named entities, activated by their +CLASS
;;; values, which get set in token mapping.  to avoid overlap with the unknown
;;; lexical entries, the token mapping rules make sure to `empty out' the PoS
;;; information whenever a named entity token is created.
;;;

generic_proper_ne := n_-_pn-gen_le &
  [ ORTH < "_generic_proper_ne_" >,
    TOKENS.+LIST < [ +CLASS proper_ne ] > ].

generic_pl_noun_ne := n_-_c-pl-gen_le &
  [ ORTH < "_generic_plur_ne_" >,
    TOKENS.+LIST < [ +CLASS plur_ne ] > ].

generic_pl_apos_noun_ne := n_-_c-pl-gen_le &
  [ ORTH < "_generic_plur_apos_ne_" >,
    TOKENS.+LIST < [ +CLASS plur_apos_ne ] > ].

generic_date_ne := n_-_day-crd-gen_le &
  [ ORTH < "_generic_date_ne_" >,
    TOKENS.+LIST < [ +CLASS date_ne ] > ].

generic_dom_card_ne := n_-_pn-dom-gen_le & 
  [ ORTH < "_generic_dom_card_ne_" >,
    TOKENS.+LIST < [ +CLASS dom_card_ne, +ONSET c-onset ] > ].

generic_dom_card_voc_ne := n_-_pn-dom-gen_le & 
  [ ORTH < "_generic_dom_card_ne_" >,
    TOKENS.+LIST < [ +CLASS dom_card_ne, +ONSET v-onset ] > ].

generic_dom_ord_ne := n_-_pn-dom-o-gen_le & 
  [ ORTH < "_generic_dom_ord_ne_" >,
    TOKENS.+LIST < [ +CLASS dom_ord_ne ] > ].

generic_dom_euro_ne := n_-_pn-dom-e-gen_le & 
  [ ORTH < "_generic_dom_euro_ne_" >,
    TOKENS.+LIST < [ +CLASS dom_card_ne, +ONSET c-onset ] > ].

generic_dom_euro_voc_ne := n_-_pn-dom-e-gen_le & 
  [ ORTH < "_generic_dom_euro_ne_" >,
    TOKENS.+LIST < [ +CLASS dom_card_ne, +ONSET v-onset ] > ].

generic_year_ne := n_-_pn-yoc-gen_le &
  [ ORTH < "_generic_year_ne_" >,
    SYNSEM.PHON.ONSET con,
    TOKENS.+LIST < [ +CLASS year_ne, +ONSET c-onset ] > ].

generic_year_voc_ne := n_-_pn-yoc-gen_le &
  [ ORTH < "_generic_year_ne_" >,
    SYNSEM.PHON.ONSET voc,
    TOKENS.+LIST < [ +CLASS year_ne, +ONSET v-onset ] > ].

generic_meas_np_ne := n_-_meas-gen_le &
  [ ORTH < "_generic_meas_np_ne_" >,
    TOKENS.+LIST < [ +CLASS meas_ne ] > ].

;; DPF 2018-03-30 - This type's complement is not constrained enough, and the
;; entry may not be needed anyway, so let's seek to get rid of it.  FIX.
generic_meas_np_np_ne := n_np_meas-gen_le &
  [ ORTH < "_generic_meas_np_np_ne_" >,
    TOKENS.+LIST < [ +CLASS meas_ne ] > ].

generic_meas_n_ne := n_-_meas-n-gen_le &
  [ ORTH < "_generic_meas_noun_ne_" >,
    TOKENS.+LIST < [ +CLASS meas_noun_ne ] > ].

generic_time_noun_ne := n_-_pn-hour-gen_le &
  [ ORTH < "_generic_time_ne_" >,
    TOKENS.+LIST < [ +CLASS time_ne ] > ].

;; DPF 2018-04-12 - For robustness, let's omit [PHON.ONSET con] here for now, 
;; since several of the token-mapping rules want to remain underspecified about
;; onset (related to the underspecified +CLASS value that they assign, one of
;; whose subtypes is card_ne), and we don't want those rules to each give rise 
;; to two generic lexical card entries (one con and one voc), so we stamp their 
;; outputs as +ONSET c-onset.  FIX someday.
;; DPF 2020-05-11 - Re 2018-04-02: We also don't want to leave ONSET unspecified
;; since we don't want robust "a" for |a apple| to combine for |a 10% drop|.  So
;; let's add the ONSET constraint, and see how it goes.
;;
generic_card_ne := aj_-_i-crd-gen_le &
  [ ORTH < "_generic_card_ne_" >,
    SYNSEM.PHON.ONSET con,
    TOKENS.+LIST < [ +CLASS card_ne, +ONSET c-onset ] > ].

generic_card_voc_ne := aj_-_i-crd-gen_le &
  [ ORTH < "_generic_card_ne_" >,
    SYNSEM.PHON.ONSET voc,
    TOKENS.+LIST < [ +CLASS card_ne, +ONSET v-onset ] > ].

generic_ord_ne := aj_-_i-ord-gen_le &
  [ ORTH < "_generic_ord_ne_" >,
    TOKENS.+LIST < [ +CLASS ord_ne ] > ].

generic_fract_ne := aj_-_i-frct-gen_le &
  [ ORTH < "_generic_fract_ne_" >,
    TOKENS.+LIST < [ +CLASS frct_ne ] > ].

;; DPF 2012-09-19 - Added entry for generic sub-one decimals in measure
;; phrases, as in |the price rose 0.3 point| and |the 2.0 release| (while
;; still blocking |the two release|).
;;
generic_decimal_ne := aj_-_i-one-gen_le &
  [ ORTH < "_generic_decimal_ne_" >,
    TOKENS.+LIST < [ +CLASS decimal_ne ] > ].