BPFK Section: PEG Morphology Algorithm: Difference between revisions

From Lojban
Jump to navigation Jump to search
m (Text replace - "jbocre: d" to "d")
m (jmina lo kleste sinxa)
 
(19 intermediate revisions by 3 users not shown)
Line 1: Line 1:
<pre>
; This is a Parsing Expression Grammar for the morphology of Lojban.%%%
; See [http://www.pdos.lcs.mit.edu/~baford/packrat/] %%%
; %%%
; All rules have the form %%%
; %%%
; name <- peg-expression%%%
; %%%
; which means that the grammatical construct "name" is parsed using%%%
; "peg-expression".  %%%
; %%%
; 1)  Concatenation is expressed by juxtaposition with no operator symbol.%%%
; 2)  / represents *ORDERED* alternation (choice).  If the first%%%
;    option succeeds, the others will never be checked.%%%
; 3)  ? indicates that the element to the left is optional.%%%
; 4)  * represents optional repetition of the construct to the left.%%%
; 5)  + represents one-or-more repetition of the construct to the left.%%%
; 6)  () serves to indicate the grouping of the other operators.%%%
; 7)  & indicates that the element to the right must follow (but the%%%
;    marked element itself does not absorb anything).%%%
; 8)  ! indicates that the element to the right must not follow (the%%%
;    marked element itself does not absorb anything).%%%
; 9)  . represents any character.%%%
; 10) ' ' or " " represents a literal string.%%%
; 11) [] represents a character class.  %%%
;%%%
; Repetitions grab as much as they can.%%%
;%%%
;%%%
; --- GRAMMAR ---%%%
; This grammar classifies words by their morphological class (cmene,%%%
; gismu, lujvo, fuhivla, cmavo, and non-lojban-word). %%%
; %%%
;The final section sorts cmavo into grammatical classes (A, BAI, BAhE, ..., ZOhU).%%%
;%%%
; mi'e ((xorxes))%%%


<code>
;-------------------------------------------------------------------%%%
 
; This is a Parsing Expression Grammar for the morphology of Lojban.<br />
 
; See [http://www.pdos.lcs.mit.edu/~baford/packrat/] <br />
 
; <br />
 
; All rules have the form <br />
 
; <br />
 
; name <- peg-expression<br />
 
; <br />
 
; which means that the grammatical construct "name" is parsed using<br />
 
; "peg-expression".  <br />
 
; <br />
 
; 1)  Concatenation is expressed by juxtaposition with no operator symbol.<br />
 
; 2)  / represents *ORDERED* alternation (choice).  If the first<br />
 
;    option succeeds, the others will never be checked.<br />
 
; 3)  ? indicates that the element to the left is optional.<br />
 
; 4)  * represents optional repetition of the construct to the left.<br />
 
; 5)  + represents one-or-more repetition of the construct to the left.<br />
 
; 6)  () serves to indicate the grouping of the other operators.<br />
 
; 7)  &amp; indicates that the element to the right must follow (but the<br />
 
;    marked element itself does not absorb anything).<br />
 
; 8)  ! indicates that the element to the right must not follow (the<br />
 
;    marked element itself does not absorb anything).<br />
 
; 9)  . represents any character.<br />
 
; 10) ' ' or " " represents a literal string.<br />
 
; 11) [[jbocre: | ]] represents a character class.  <br />
 
;<br />
 
; Repetitions grab as much as they can.<br />
 
;<br />
 
;<br />
 
; --- GRAMMAR ---<br />
 
; This grammar classifies words by their morphological class (cmene,<br />
 
; gismu, lujvo, fuhivla, cmavo, and non-lojban-word). <br />
 
; <br />
 
;The final section sorts cmavo into grammatical classes (A, BAI, BAhE, ..., ZOhU).<br />
 
;<br />
 
; mi'e [[User:xorxes|xorxes]]<br />
 
;-------------------------------------------------------------------<br />


words <- pause? (word pause?)*
words <- pause? (word pause?)*
Line 86: Line 50:
cmene <- jbocme / zifcme
cmene <- jbocme / zifcme


zifcme <- !h (nucleus / glide / h / consonant !pause / digit)* consonant &amp;pause
zifcme <- !h (nucleus / glide / h / consonant !pause / digit)* consonant &pause


jbocme <- &amp;zifcme (any-syllable / digit)* &amp;pause  
jbocme <- &zifcme (any-syllable / digit)* &pause  


;cmene <- !h &amp;consonant-final coda? (any-syllable / digit)* &amp;pause
;cmene <- !h &consonant-final coda? (any-syllable / digit)* &pause


;consonant-final <- (non-space &amp;non-space)* consonant &amp;pause
;consonant-final <- (non-space &non-space)* consonant &pause


;cmene <- !h cmene-syllable* &amp;consonant coda? consonantal-syllable* onset &amp;pause
;cmene <- !h cmene-syllable* &consonant coda? consonantal-syllable* onset &pause


;cmene-syllable <- !doi-la-lai-lahi coda? consonantal-syllable* onset nucleus / digit
;cmene-syllable <- !doi-la-lai-lahi coda? consonantal-syllable* onset nucleus / digit
Line 102: Line 66:
;-------------------------------------------------------------------
;-------------------------------------------------------------------


cmavo <- !cmene !CVCy-lujvo cmavo-form &amp;post-word  
cmavo <- !cmene !CVCy-lujvo cmavo-form &post-word  


CVCy-lujvo <- CVC-rafsi y h? initial-rafsi* brivla-core / stressed-CVC-rafsi y short-final-rafsi
CVCy-lujvo <- CVC-rafsi y h? initial-rafsi* brivla-core / stressed-CVC-rafsi y short-final-rafsi
Line 116: Line 80:
stressed-initial-rafsi <- stressed-extended-rafsi / stressed-y-rafsi / stressed-y-less-rafsi
stressed-initial-rafsi <- stressed-extended-rafsi / stressed-y-rafsi / stressed-y-less-rafsi


initial-rafsi <- extended-rafsi / y-rafsi / !any-extended-rafsi y-less-rafsi
initial-rafsi <- extended-rafsi / y-rafsi / !any-extended-rafsi y-less-rafsi !any-extended-rafsi


any-extended-rafsi <- fuhivla / extended-rafsi / stressed-extended-rafsi
any-extended-rafsi <- fuhivla / extended-rafsi / stressed-extended-rafsi
Line 128: Line 92:
extended-rafsi <- brivla-rafsi / fuhivla-rafsi
extended-rafsi <- brivla-rafsi / fuhivla-rafsi


stressed-brivla-rafsi <- &amp;unstressed-syllable brivla-head stressed-syllable h y
stressed-brivla-rafsi <- &unstressed-syllable brivla-head stressed-syllable h y


brivla-rafsi <- &amp;(syllable consonantal-syllable* syllable) brivla-head h y h?
brivla-rafsi <- &(syllable consonantal-syllable* syllable) brivla-head h y h?


stressed-fuhivla-rafsi <- fuhivla-head stressed-syllable &amp;consonant onset y  
stressed-fuhivla-rafsi <- fuhivla-head stressed-syllable !h onset y  


fuhivla-rafsi <- &amp;unstressed-syllable fuhivla-head &amp;consonant onset y h?
fuhivla-rafsi <- &unstressed-syllable fuhivla-head !h onset y h?


fuhivla-head <- !rafsi-string brivla-head
fuhivla-head <- !rafsi-string brivla-head


brivla-head <- !cmavo !slinkuhi !h &amp;onset unstressed-syllable*
brivla-head <- !cmavo !slinkuhi !h &onset unstressed-syllable*


slinkuhi <- consonant rafsi-string
slinkuhi <- !rafsi-string consonant rafsi-string  


rafsi-string <- y-less-rafsi* (gismu / CVV-final-rafsi / stressed-y-less-rafsi short-final-rafsi / y-rafsi / stressed-y-rafsi / stressed-y-less-rafsi? initial-pair y)
rafsi-string <- y-less-rafsi* (gismu / CVV-final-rafsi / stressed-y-less-rafsi short-final-rafsi / y-rafsi / stressed-y-rafsi / stressed-y-less-rafsi? initial-pair y / hy-rafsi / stressed-hy-rafsi)


;-------------------------------------------------------------------
;-------------------------------------------------------------------


gismu <- (initial-pair stressed-vowel / consonant stressed-vowel consonant) &amp;final-syllable consonant vowel &amp;post-word
gismu <- (initial-pair stressed-vowel / consonant stressed-vowel consonant) &final-syllable consonant vowel &post-word
 
CVV-final-rafsi <- consonant stressed-vowel h &final-syllable vowel &post-word


CVV-final-rafsi <- consonant stressed-vowel h &amp;final-syllable vowel &amp;post-word
short-final-rafsi <- &final-syllable (consonant diphthong / initial-pair vowel) &post-word


short-final-rafsi <- &amp;final-syllable (consonant diphthong / initial-pair vowel) &amp;post-word
stressed-hy-rafsi <- (long-rafsi stressed-vowel / stressed-CCV-rafsi / stressed-CVV-rafsi) h y


stressed-y-rafsi <- (stressed-long-rafsi / stressed-CVC-rafsi) y
stressed-y-rafsi <- (stressed-long-rafsi / stressed-CVC-rafsi) y
Line 163: Line 129:


stressed-CVV-rafsi <- consonant (unstressed-vowel h stressed-vowel / stressed-diphthong) r-hyphen?  
stressed-CVV-rafsi <- consonant (unstressed-vowel h stressed-vowel / stressed-diphthong) r-hyphen?  
hy-rafsi <- (long-rafsi vowel / CCV-rafsi / CVV-rafsi) h y h?


y-rafsi <- (long-rafsi / CVC-rafsi) y h?
y-rafsi <- (long-rafsi / CVC-rafsi) y h?


y-less-rafsi <- !y-rafsi (CVC-rafsi !y / CCV-rafsi / CVV-rafsi) !any-extended-rafsi
y-less-rafsi <- !y-rafsi !stressed-y-rafsi !hy-rafsi !stressed-hy-rafsi (CVC-rafsi / CCV-rafsi / CVV-rafsi) !h


long-rafsi <- initial-pair unstressed-vowel consonant / consonant unstressed-vowel consonant consonant  
long-rafsi <- initial-pair unstressed-vowel consonant / consonant unstressed-vowel consonant consonant  
Line 176: Line 145:
CVV-rafsi <- consonant (unstressed-vowel h unstressed-vowel / unstressed-diphthong) r-hyphen?
CVV-rafsi <- consonant (unstressed-vowel h unstressed-vowel / unstressed-diphthong) r-hyphen?


r-hyphen <- r &amp;consonant / n &amp;r
r-hyphen <- r &consonant / n &r


;-------------------------------------------------------------------
;-------------------------------------------------------------------


final-syllable <-  onset !y !stressed nucleus !cmene &amp;post-word
final-syllable <-  onset !y !stressed nucleus !cmene &post-word


stressed-syllable <- &amp;stressed syllable / syllable &amp;stress
stressed-syllable <- &stressed syllable / syllable &stress


stressed-diphthong <- &amp;stressed diphthong / diphthong &amp;stress
stressed-diphthong <- &stressed diphthong / diphthong &stress


stressed-vowel <- &amp;stressed vowel / vowel &amp;stress
stressed-vowel <- &stressed vowel / vowel &stress


unstressed-syllable <- !stressed syllable !stress / consonantal-syllable
unstressed-syllable <- !stressed syllable !stress / consonantal-syllable
Line 194: Line 163:
unstressed-vowel <- !stressed vowel !stress
unstressed-vowel <- !stressed vowel !stress


stress <- consonant* y? syllable pause
stress <- consonant* h? y? syllable pause


stressed <- onset comma* [[jbocre: AEIOU|AEIOU]]
stressed <- onset comma* [AEIOU]


any-syllable <- onset nucleus coda? / consonantal-syllable  
any-syllable <- onset nucleus coda? / consonantal-syllable  
Line 202: Line 171:
syllable <- onset !y nucleus coda?
syllable <- onset !y nucleus coda?


consonantal-syllable <- consonant syllabic &amp;(consonantal-syllable / onset) (consonant &amp;spaces)?
consonantal-syllable <- consonant &syllabic coda


coda <- !any-syllable consonant &amp;any-syllable / syllabic? consonant? &amp;pause  
coda <- !any-syllable consonant &any-syllable / syllabic? consonant? &pause  


onset <-  h / consonant? glide / initial
onset <-  h / glide / initial


nucleus <- vowel / diphthong / y !nucleus
nucleus <- vowel / diphthong / y !nucleus
Line 212: Line 181:
;-----------------------------------------------------------------
;-----------------------------------------------------------------


glide <- (i / u) &amp;nucleus !glide
glide <- (i / u) &nucleus


diphthong <- (a i / a u / e i / o i) !nucleus !glide
diphthong <- (a i !i / a u !u / e i !i / o i !i) !nucleus


vowel <- (a / e / i / o / u) !nucleus
vowel <- (a / e / i / o / u) !nucleus


a <- comma* [[jbocre: aA|aA]]  
a <- comma* [aA]  


e <- comma* [[eE|eE]]  
e <- comma* [eE]  


i <- comma* [[jbocre: iI|iI]]  
i <- comma* [iI]  


o <- comma* [[jbocre: oO|oO]]  
o <- comma* [oO]  


u <- comma* [[jbocre: uU|uU]]  
u <- comma* [uU]  


y <- comma* [[jbocre: yY|yY]]  
y <- comma* [yY]  


;-------------------------------------------------------------------
;-------------------------------------------------------------------
Line 234: Line 203:
cluster <- consonant consonant+
cluster <- consonant consonant+


initial-pair <- &amp;initial consonant consonant !consonant
initial-pair <- &initial consonant consonant !consonant


initial <- (affricate / sibilant? other? liquid?) !consonant !glide
initial <- (affricate / sibilant? other? liquid?) !consonant !glide
Line 254: Line 223:
unvoiced <- c / f / k / p / s / t / x
unvoiced <- c / f / k / p / s / t / x


l <- comma* [[jbocre: lL|lL]] !h !l
l <- comma* [lL] !h !glide !l


m <- comma* [[jbocre: mM|mM]] !h !m !z
m <- comma* [mM] !h !glide !m !z


n <- comma* [[jbocre: nN|nN]] !h !n !affricate
n <- comma* [nN] !h !glide !n !affricate


r <- comma* [[jbocre: rR|rR]] !h !r
r <- comma* [rR] !h !glide !r


b <- comma* [[bB|bB]] !h !b !unvoiced
b <- comma* [bB] !h !glide !b !unvoiced


d <- comma* [[dD|dD]] !h !d !unvoiced
d <- comma* [dD] !h !glide !d !unvoiced


g <- comma* [[jbocre: gG|gG]] !h !g !unvoiced
g <- comma* [gG] !h !glide !g !unvoiced


v <- comma* [[jbocre: vV|vV]] !h !v !unvoiced
v <- comma* [vV] !h !glide !v !unvoiced


j <- comma* [[jbocre: jJ|jJ]] !h !j !z !unvoiced
j <- comma* [jJ] !h !glide !j !z !unvoiced


z <- comma* [[jbocre: zZ|zZ]] !h !z !j !unvoiced
z <- comma* [zZ] !h !glide !z !j !unvoiced


s <- comma* [[jbocre: sS|sS]] !h !s !c !voiced
s <- comma* [sS] !h !glide !s !c !voiced


c <- comma* [[cC|cC]] !h !c !s !x !voiced
c <- comma* [cC] !h !glide !c !s !x !voiced


x <- comma* [[jbocre: xX|xX]] !h !x !c !k !voiced
x <- comma* [xX] !h !glide !x !c !k !voiced


k <- comma* [[jbocre: kK|kK]] !h !k !x !voiced
k <- comma* [kK] !h !glide !k !x !voiced


f <- comma* [[jbocre: fF|fF]] !h !f !voiced
f <- comma* [fF] !h !glide !f !voiced


p <- comma* [[jbocre: pP|pP]] !h !p !voiced
p <- comma* [pP] !h !glide !p !voiced


t <- comma* [[jbocre: tT|tT]] !h !t !voiced
t <- comma* [tT] !h !glide !t !voiced


h <- comma* [[jbocre: 'h|'h]] &amp;nucleus
h <- comma* ['h] &nucleus


;-------------------------------------------------------------------
;-------------------------------------------------------------------


digit <- comma* [[jbocre: 0123456789|0123456789]] !h !nucleus
digit <- comma* [0123456789] !h !nucleus


post-word <- pause / !nucleus lojban-word
post-word <- pause / !nucleus lojban-word
Line 300: Line 269:
EOF <- comma* !.
EOF <- comma* !.


comma <- [[jbocre: ,|,]]
comma <- [,]


non-lojban-word <- !lojban-word non-space+
non-lojban-word <- !lojban-word non-space+
Line 306: Line 275:
NORATS non-space <- !space-char .
NORATS non-space <- !space-char .


NORATS space-char <- [[jbocre: .\t\n\r?!\u0020|.\t\n\r?!\u0020]]
NORATS space-char <- [.\t\n\r?!\u0020]


;-------------------------------------------------------------------
;-------------------------------------------------------------------
Line 318: Line 287:
;-------------------------------------------------------------------
;-------------------------------------------------------------------


A <- &amp;cmavo ( a / e / j i / o / u ) &amp;post-word
A <- &cmavo ( a / e / j i / o / u ) &post-word


BAI <- &amp;cmavo ( d u h o / s i h u / z a u / k i h i / d u h i / c u h u / t u h i / t i h u / d i h o / j i h u / r i h a / n i h i / m u h i / k i h u / v a h u / k o i / c a h i / t a h i / p u h e / j a h i / k a i / b a i / f i h e / d e h i / c i h o / m a u / m u h u / r i h i / r a h i / k a h a / p a h u / p a h a / l e h a / k u h u / t a i / b a u / m a h i / c i h e / f a u / p o h i / c a u / m a h e / c i h u / r a h a / p u h a / l i h e / l a h u / b a h i / k a h i / s a u / f a h e / b e h i / t i h i / j a h e / g a h a / v a h o / j i h o / m e h a / d o h e / j i h e / p i h o / g a u / z u h e / m e h e / r a i ) &amp;post-word
BAI <- &cmavo ( d u h o / s i h u / z a u / k i h i / d u h i / c u h u / t u h i / t i h u / d i h o / j i h u / r i h a / n i h i / m u h i / k i h u / v a h u / k o i / c a h i / t a h i / p u h e / j a h i / k a i / b a i / f i h e / d e h i / c i h o / m a u / m u h u / r i h i / r a h i / k a h a / p a h u / p a h a / l e h a / k u h u / t a i / b a u / m a h i / c i h e / f a u / p o h i / c a u / m a h e / c i h u / r a h a / p u h a / l i h e / l a h u / b a h i / k a h i / s a u / f a h e / b e h i / t i h i / j a h e / g a h a / v a h o / j i h o / m e h a / d o h e / j i h e / p i h o / g a u / z u h e / m e h e / r a i ) &post-word


BAhE <- &amp;cmavo ( b a h e / z a h e ) &amp;post-word
BAhE <- &cmavo ( b a h e / z a h e ) &post-word


BE <- &amp;cmavo ( b e ) &amp;post-word
BE <- &cmavo ( b e ) &post-word


BEI <- &amp;cmavo ( b e i ) &amp;post-word
BEI <- &cmavo ( b e i ) &post-word


BEhO <- &amp;cmavo ( b e h o ) &amp;post-word
BEhO <- &cmavo ( b e h o ) &post-word


BIhE <- &amp;cmavo ( b i h e ) &amp;post-word
BIhE <- &cmavo ( b i h e ) &post-word


BIhI <- &amp;cmavo ( m i h i / b i h o / b i h i ) &amp;post-word
BIhI <- &cmavo ( m i h i / b i h o / b i h i ) &post-word


BO <- &amp;cmavo ( b o ) &amp;post-word
BO <- &cmavo ( b o ) &post-word


BOI <- &amp;cmavo ( b o i ) &amp;post-word
BOI <- &cmavo ( b o i ) &post-word


BU <- &amp;cmavo ( b u ) &amp;post-word
BU <- &cmavo ( b u ) &post-word


BY <- ybu / &amp;cmavo ( j o h o / r u h o / g e h o / j e h o / l o h a / n a h a / s e h e / t o h a / g a h e / y h y /  b y / c y / d y / f y / g y / j y / k y / l y / m y / n y / p y / r y / s y / t y / v y / x y / z y ) &amp;post-word
BY <- ybu / &cmavo ( j o h o / r u h o / g e h o / j e h o / l o h a / n a h a / s e h e / t o h a / g a h e / y h y /  b y / c y / d y / f y / g y / j y / k y / l y / m y / n y / p y / r y / s y / t y / v y / x y / z y ) &post-word


CAhA <- &amp;cmavo ( c a h a / p u h i / n u h o / k a h e ) &amp;post-word
CAhA <- &cmavo ( c a h a / p u h i / n u h o / k a h e ) &post-word


CAI <- &amp;cmavo ( p e i / c a i / c u h i / s a i / r u h e ) &amp;post-word
CAI <- &cmavo ( p e i / c a i / c u h i / s a i / r u h e ) &post-word


CEI <- &amp;cmavo ( c e i ) &amp;post-word
CEI <- &cmavo ( c e i ) &post-word


CEhE <- &amp;cmavo ( c e h e ) &amp;post-word
CEhE <- &cmavo ( c e h e ) &post-word


CO <- &amp;cmavo ( c o ) &amp;post-word
CO <- &cmavo ( c o ) &post-word


COI <- &amp;cmavo ( j u h i / c o i / f i h i / t a h a / m u h o / f e h o / c o h o / p e h u / k e h o / n u h e / r e h i / b e h e / j e h e / m i h e / k i h e / v i h o ) &amp;post-word
COI <- &cmavo ( j u h i / c o i / f i h i / t a h a / m u h o / f e h o / c o h o / p e h u / k e h o / n u h e / r e h i / b e h e / j e h e / m i h e / k i h e / v i h o ) &post-word


CU <- &amp;cmavo ( c u ) &amp;post-word
CU <- &cmavo ( c u ) &post-word


CUhE <- &amp;cmavo ( c u h e / n a u ) &amp;post-word
CUhE <- &cmavo ( c u h e / n a u ) &post-word


DAhO <- &amp;cmavo ( d a h o ) &amp;post-word
DAhO <- &cmavo ( d a h o ) &post-word


DOI <- &amp;cmavo ( d o i ) &amp;post-word
DOI <- &cmavo ( d o i ) &post-word


DOhU <- &amp;cmavo ( d o h u ) &amp;post-word
DOhU <- &cmavo ( d o h u ) &post-word


FA <- &amp;cmavo ( f a i / f a / f e / f o / f u / f i h a / f i ) &amp;post-word
FA <- &cmavo ( f a i / f a / f e / f o / f u / f i h a / f i ) &post-word


FAhA <- &amp;cmavo ( d u h a / b e h a / n e h u / v u h a / g a h u / t i h a / n i h a / c a h u / z u h a / r i h u / r u h u / r e h  o / t e h e / b u h u / n e h a / p a h o / n e h i / t o h o / z o h i / z e h o / z o h a / f a h a ) &amp;post-word
FAhA <- &cmavo ( d u h a / b e h a / n e h u / v u h a / g a h u / t i h a / n i h a / c a h u / z u h a / r i h u / r u h u / r e h  o / t e h e / b u h u / n e h a / p a h o / n e h i / t o h o / z o h i / z e h o / z o h a / f a h a ) &post-word


FAhO <- &amp;cmavo ( f a h o ) &amp;post-word
FAhO <- &cmavo ( f a h o ) &post-word


FEhE <- &amp;cmavo ( f e h e ) &amp;post-word
FEhE <- &cmavo ( f e h e ) &post-word


FEhU <- &amp;cmavo ( f e h u ) &amp;post-word
FEhU <- &cmavo ( f e h u ) &post-word


FIhO <- &amp;cmavo ( f i h o ) &amp;post-word
FIhO <- &cmavo ( f i h o ) &post-word


FOI <- &amp;cmavo ( f o i ) &amp;post-word
FOI <- &cmavo ( f o i ) &post-word


FUhA <- &amp;cmavo ( f u h a ) &amp;post-word
FUhA <- &cmavo ( f u h a ) &post-word


FUhE <- &amp;cmavo ( f u h e ) &amp;post-word
FUhE <- &cmavo ( f u h e ) &post-word


FUhO <- &amp;cmavo ( f u h o ) &amp;post-word
FUhO <- &cmavo ( f u h o ) &post-word


GA <- &amp;cmavo ( g e h i / g e /  g o / g a / g u ) &amp;post-word
GA <- &cmavo ( g e h i / g e /  g o / g a / g u ) &post-word


GAhO <- &amp;cmavo ( k e h i / g a h o ) &amp;post-word
GAhO <- &cmavo ( k e h i / g a h o ) &post-word


GEhU <- &amp;cmavo ( g e h u ) &amp;post-word
GEhU <- &cmavo ( g e h u ) &post-word


GI <- &amp;cmavo ( g i ) &amp;post-word
GI <- &cmavo ( g i ) &post-word


GIhA <- &amp;cmavo ( g i h e / g i h i / g i h o / g i h a / g i h u ) &amp;post-word
GIhA <- &cmavo ( g i h e / g i h i / g i h o / g i h a / g i h u ) &post-word


GOI <- &amp;cmavo ( n o h u / n e / g o i / p o h u / p e / p o h e / p o ) &amp;post-word
GOI <- &cmavo ( n o h u / n e / g o i / p o h u / p e / p o h e / p o ) &post-word


GOhA <- &amp;cmavo ( m o / n e i / g o h u / g o h o / g o h i / n o h a / g o h e / g o h a / d u / b u h a / b u h e / b u h i / c o h  e ) &amp;post-word
GOhA <- &cmavo ( m o / n e i / g o h u / g o h o / g o h i / n o h a / g o h e / g o h a / d u / b u h a / b u h e / b u h i / c o h  e ) &post-word


GUhA <- &amp;cmavo ( g u h e / g u h i / g u h o / g u h a / g u h u ) &amp;post-word
GUhA <- &cmavo ( g u h e / g u h i / g u h o / g u h a / g u h u ) &post-word


I <- &amp;cmavo ( i ) &amp;post-word
I <- &cmavo ( i ) &post-word


JA <- &amp;cmavo ( j e h i / j e /  j o / j a / j u ) &amp;post-word
JA <- &cmavo ( j e h i / j e /  j o / j a / j u ) &post-word


JAI <- &amp;cmavo ( j a i ) &amp;post-word
JAI <- &cmavo ( j a i ) &post-word


JOhI <- &amp;cmavo ( j o h i ) &amp;post-word
JOhI <- &cmavo ( j o h i ) &post-word


JOI <- &amp;cmavo ( f a h u / p i h u / j o i / c e h o / c e / j o h u / k u h a / j o h e / j u h e ) &amp;post-word
JOI <- &cmavo ( f a h u / p i h u / j o i / c e h o / c e / j o h u / k u h a / j o h e / j u h e ) &post-word


KE <- &amp;cmavo ( k e ) &amp;post-word
KE <- &cmavo ( k e ) &post-word


KEhE <- &amp;cmavo ( k e h e ) &amp;post-word
KEhE <- &cmavo ( k e h e ) &post-word


KEI <- &amp;cmavo ( k e i ) &amp;post-word
KEI <- &cmavo ( k e i ) &post-word


KI <- &amp;cmavo ( k i ) &amp;post-word
KI <- &cmavo ( k i ) &post-word


KOhA <- &amp;cmavo ( d a h u / d a h e / d i h u / d i h e / d e h u / d e h e / d e i / d o h i / m i h o / m a h a / m i h a / d o h o  / k o h a / f o h u / k o h e / k o h i / k o h o / k o h u / f o h a / f o h e / f o h i / f o h o / v o h a / v o h e / v o h i /  v o h o / v o h u / r u / r i / r a / t a / t u / t i / z i h o / k e h a / m a / z u h i / z o h e / c e h u / d a / d e / d i / k  o / m i / d o ) &amp;post-word
KOhA <- &cmavo ( d a h u / d a h e / d i h u / d i h e / d e h u / d e h e / d e i / d o h i / m i h o / m a h a / m i h a / d o h o  / k o h a / f o h u / k o h e / k o h i / k o h o / k o h u / f o h a / f o h e / f o h i / f o h o / v o h a / v o h e / v o h i /  v o h o / v o h u / r u / r i / r a / t a / t u / t i / z i h o / k e h a / m a / z u h i / z o h e / c e h u / d a / d e / d i / k  o / m i / d o ) &post-word


KU <- &amp;cmavo ( k u ) &amp;post-word
KU <- &cmavo ( k u ) &post-word


KUhE <- &amp;cmavo ( k u h e ) &amp;post-word
KUhE <- &cmavo ( k u h e ) &post-word


KUhO <- &amp;cmavo ( k u h o ) &amp;post-word
KUhO <- &cmavo ( k u h o ) &post-word


LA <- &amp;cmavo ( l a i / l a h i / l a ) &amp;post-word
LA <- &cmavo ( l a i / l a h i / l a ) &post-word


LAU <- &amp;cmavo ( c e h a / l a u / z a i / t a u ) &amp;post-word
LAU <- &cmavo ( c e h a / l a u / z a i / t a u ) &post-word


LAhE <- &amp;cmavo ( t u h a / l u h a / l u h o / l a h e / v u h i / l u h i / l u h e ) &amp;post-word
LAhE <- &cmavo ( t u h a / l u h a / l u h o / l a h e / v u h i / l u h i / l u h e ) &post-word


LE <- &amp;cmavo ( l e i / l o i / l e h i / l o h i / l e h e / l o h e / l o / l e ) &amp;post-word
LE <- &cmavo ( l e i / l o i / l e h i / l o h i / l e h e / l o h e / l o / l e ) &post-word


LEhU <- &amp;cmavo ( l e h u ) &amp;post-word
LEhU <- &cmavo ( l e h u ) &post-word


LI <- &amp;cmavo ( m e h o / l i ) &amp;post-word
LI <- &cmavo ( m e h o / l i ) &post-word


LIhU <- &amp;cmavo ( l i h u ) &amp;post-word
LIhU <- &cmavo ( l i h u ) &post-word


LOhO <- &amp;cmavo ( l o h o ) &amp;post-word
LOhO <- &cmavo ( l o h o ) &post-word


LOhU <- &amp;cmavo ( l o h u ) &amp;post-word
LOhU <- &cmavo ( l o h u ) &post-word


LU <- &amp;cmavo ( l u ) &amp;post-word
LU <- &cmavo ( l u ) &post-word


LUhU <- &amp;cmavo ( l u h u ) &amp;post-word
LUhU <- &cmavo ( l u h u ) &post-word


MAhO <- &amp;cmavo ( m a h o ) &amp;post-word
MAhO <- &cmavo ( m a h o ) &post-word


MAI <- &amp;cmavo ( m o h o / m a i ) &amp;post-word
MAI <- &cmavo ( m o h o / m a i ) &post-word


ME <- &amp;cmavo ( m e ) &amp;post-word
ME <- &cmavo ( m e ) &post-word


MEhU <- &amp;cmavo ( m e h u ) &amp;post-word
MEhU <- &cmavo ( m e h u ) &post-word


MOhE <- &amp;cmavo ( m o h e ) &amp;post-word
MOhE <- &cmavo ( m o h e ) &post-word


MOhI <- &amp;cmavo ( m o h i ) &amp;post-word
MOhI <- &cmavo ( m o h i ) &post-word


MOI <- &amp;cmavo ( m e i / m o i / s i h e / c u h o / v a h e ) &amp;post-word
MOI <- &cmavo ( m e i / m o i / s i h e / c u h o / v a h e ) &post-word


NA <- &amp;cmavo ( j a h a / n a ) &amp;post-word
NA <- &cmavo ( j a h a / n a ) &post-word


NAI <- &amp;cmavo ( n a i ) &amp;post-word
NAI <- &cmavo ( n a i ) &post-word


NAhE <- &amp;cmavo ( t o h e / j e h a / n a h e / n o h e ) &amp;post-word
NAhE <- &cmavo ( t o h e / j e h a / n a h e / n o h e ) &post-word


NAhU <- &amp;cmavo ( n a h u ) &amp;post-word
NAhU <- &cmavo ( n a h u ) &post-word


NIhE <- &amp;cmavo ( n i h e ) &amp;post-word
NIhE <- &cmavo ( n i h e ) &post-word


NIhO <- &amp;cmavo ( n i h o / n o h i ) &amp;post-word
NIhO <- &cmavo ( n i h o / n o h i ) &post-word


NOI <- &amp;cmavo ( v o i / n o i / p o i ) &amp;post-word
NOI <- &cmavo ( v o i / n o i / p o i ) &post-word


NU <- &amp;cmavo ( n i / d u h u / s i h o / n u / l i h i / k a / j e i / s u h u / z u h o / m u h e / p u h u / z a h i ) &amp;post-word
NU <- &cmavo ( n i / d u h u / s i h o / n u / l i h i / k a / j e i / s u h u / z u h o / m u h e / p u h u / z a h i ) &post-word


NUhA <- &amp;cmavo ( n u h a ) &amp;post-word
NUhA <- &cmavo ( n u h a ) &post-word


NUhI <- &amp;cmavo ( n u h i ) &amp;post-word
NUhI <- &cmavo ( n u h i ) &post-word


NUhU <- &amp;cmavo ( n u h u ) &amp;post-word
NUhU <- &cmavo ( n u h u ) &post-word


PA <- &amp;cmavo ( d a u / f e i / g a i / j a u / r e i / v a i / p i h e / p i /  f i h u / z a h u / m e h i / n i h u / k i h o / c e h i / m a h u / r a h e / d a h a / s o h a / j i h i / s u h o / s u h e / r o / r a u / s o h u / s o h i / s o h e / s o h o / m o h a / d u h e / t e h o / k a h o / c i h i / t u h o / x o / p a i / n o h o / n o / p a / r e / c i / v o / m u / x a / z e / b i / s o / digit ) &amp;post-word
PA <- &cmavo ( d a u / f e i / g a i / j a u / r e i / v a i / p i h e / p i /  f i h u / z a h u / m e h i / n i h u / k i h o / c e h i / m a h u / r a h e / d a h a / s o h a / j i h i / s u h o / s u h e / r o / r a u / s o h u / s o h i / s o h e / s o h o / m o h a / d u h e / t e h o / k a h o / c i h i / t u h o / x o / p a i / n o h o / n o / p a / r e / c i / v o / m u / x a / z e / b i / s o / digit ) &post-word


PEhE <- &amp;cmavo ( p e h e ) &amp;post-word
PEhE <- &cmavo ( p e h e ) &post-word


PEhO <- &amp;cmavo ( p e h o ) &amp;post-word
PEhO <- &cmavo ( p e h o ) &post-word


PU <- &amp;cmavo ( b a / p u / c a ) &amp;post-word
PU <- &cmavo ( b a / p u / c a ) &post-word


RAhO <- &amp;cmavo ( r a h o ) &amp;post-word
RAhO <- &cmavo ( r a h o ) &post-word


ROI <- &amp;cmavo ( r e h u / r o i ) &amp;post-word
ROI <- &cmavo ( r e h u / r o i ) &post-word


SA <- &amp;cmavo ( s a ) &amp;post-word
SA <- &cmavo ( s a ) &post-word


SE <- &amp;cmavo ( s e / t e / v e / x e ) &amp;post-word
SE <- &cmavo ( s e / t e / v e / x e ) &post-word


SEI <- &amp;cmavo ( s e i / t i h o ) &amp;post-word
SEI <- &cmavo ( s e i / t i h o ) &post-word


SEhU <- &amp;cmavo ( s e h u ) &amp;post-word
SEhU <- &cmavo ( s e h u ) &post-word


SI <- &amp;cmavo ( s i ) &amp;post-word
SI <- &cmavo ( s i ) &post-word


SOI <- &amp;cmavo ( s o i ) &amp;post-word
SOI <- &cmavo ( s o i ) &post-word


SU <- &amp;cmavo ( s u ) &amp;post-word
SU <- &cmavo ( s u ) &post-word


TAhE <- &amp;cmavo ( r u h i / t a h e / d i h i / n a h o ) &amp;post-word
TAhE <- &cmavo ( r u h i / t a h e / d i h i / n a h o ) &post-word


TEhU <- &amp;cmavo ( t e h u ) &amp;post-word
TEhU <- &cmavo ( t e h u ) &post-word


TEI <- &amp;cmavo ( t e i ) &amp;post-word
TEI <- &cmavo ( t e i ) &post-word


TO <- &amp;cmavo ( t o h i / t o ) &amp;post-word
TO <- &cmavo ( t o h i / t o ) &post-word


TOI <- &amp;cmavo ( t o i ) &amp;post-word
TOI <- &cmavo ( t o i ) &post-word


TUhE <- &amp;cmavo ( t u h e ) &amp;post-word
TUhE <- &cmavo ( t u h e ) &post-word


TUhU <- &amp;cmavo ( t u h u ) &amp;post-word
TUhU <- &cmavo ( t u h u ) &post-word


UI <- &amp;cmavo ( i h a / i e / a h e / u h i / i h o / i h e / a h a / i a / o h i / o h e / e h e / o i / u o / e h i / u h o / a u /  u a / a h i / i h u / i i / u h a / u i / a h o / a i / a h u / i u / e i / o h o / e h a / u u / o h a / o h u / u h u / e h o / i  o / e h u / u e / i h i / u h e / b a h a / j a h o / c a h e / s u h a / t i h e / k a h u / s e h o / z a h a / p e h i / r u h a  / j u h a / t a h o / r a h u / l i h a / b a h u / m u h a / d o h a / t o h u / v a h i / p a h e / z u h u / s a h e / l a h a /  k e h u / s a h u / d a h i / j e h u / s a h a / k a u / t a h u / n a h i / j o h a / b i h u / l i h o / p a u / m i h u / k u h  i / j i h a / s i h a / p o h o / p e h a / r o h i / r o h e / r o h o / r o h u / r o h a / r e h e / l e h o / j u h o / f u h i  / d a i / g a h i / z o h o / b e h u / r i h e / s e h i / s e h a / v u h e / k i h a / x u / g e h e / b u h o ) &amp;post-word
UI <- &cmavo ( i h a / i e / a h e / u h i / i h o / i h e / a h a / i a / o h i / o h e / e h e / o i / u o / e h i / u h o / a u /  u a / a h i / i h u / i i / u h a / u i / a h o / a i / a h u / i u / e i / o h o / e h a / u u / o h a / o h u / u h u / e h o / i  o / e h u / u e / i h i / u h e / b a h a / j a h o / c a h e / s u h a / t i h e / k a h u / s e h o / z a h a / p e h i / r u h a  / j u h a / t a h o / r a h u / l i h a / b a h u / m u h a / d o h a / t o h u / v a h i / p a h e / z u h u / s a h e / l a h a /  k e h u / s a h u / d a h i / j e h u / s a h a / k a u / t a h u / n a h i / j o h a / b i h u / l i h o / p a u / m i h u / k u h  i / j i h a / s i h a / p o h o / p e h a / r o h i / r o h e / r o h o / r o h u / r o h a / r e h e / l e h o / j u h o / f u h i  / d a i / g a h i / z o h o / b e h u / r i h e / s e h i / s e h a / v u h e / k i h a / x u / g e h e / b u h o ) &post-word


VA <- &amp;cmavo ( v i / v a / v u ) &amp;post-word
VA <- &cmavo ( v i / v a / v u ) &post-word


VAU <- &amp;cmavo ( v a u ) &amp;post-word
VAU <- &cmavo ( v a u ) &post-word


VEI <- &amp;cmavo ( v e i ) &amp;post-word
VEI <- &cmavo ( v e i ) &post-word


VEhO <- &amp;cmavo ( v e h o ) &amp;post-word
VEhO <- &cmavo ( v e h o ) &post-word


VUhU <- &amp;cmavo ( g e h a / f u h u / p i h i / f e h i / v u h u / s u h i / j u h u / g e i / p a h i / f a h i / t e h a / c u h a  / v a h a / n e h o / d e h o / f e h a / s a h o / r e h a / r i h o / s a h i / p i h a / s i h i ) &amp;post-word
VUhU <- &cmavo ( g e h a / f u h u / p i h i / f e h i / v u h u / s u h i / j u h u / g e i / p a h i / f a h i / t e h a / c u h a  / v a h a / n e h o / d e h o / f e h a / s a h o / r e h a / r i h o / s a h i / p i h a / s i h i ) &post-word


VEhA <- &amp;cmavo ( v e h u / v e h a / v e h i / v e h e ) &amp;post-word
VEhA <- &cmavo ( v e h u / v e h a / v e h i / v e h e ) &post-word


VIhA <- &amp;cmavo ( v i h i / v i h a / v i h u / v i h e ) &amp;post-word
VIhA <- &cmavo ( v i h i / v i h a / v i h u / v i h e ) &post-word


VUhO <- &amp;cmavo ( v u h o ) &amp;post-word
VUhO <- &cmavo ( v u h o ) &post-word


XI <- &amp;cmavo ( x i ) &amp;post-word
XI <- &cmavo ( x i ) &post-word


Y <- &amp;cmavo ( y+ ) &amp;post-word
Y <- &cmavo ( y+ ) &post-word


ZAhO <- &amp;cmavo ( c o h i / p u h o / c o h u / m o h u / c a h o / c o h a / d e h a / b a h o / d i h a / z a h o ) &amp;post-word
ZAhO <- &cmavo ( c o h i / p u h o / c o h u / m o h u / c a h o / c o h a / d e h a / b a h o / d i h a / z a h o ) &post-word


ZEhA <- &amp;cmavo ( z e h u / z e h a / z e h i / z e h e ) &amp;post-word
ZEhA <- &cmavo ( z e h u / z e h a / z e h i / z e h e ) &post-word


ZEI <- &amp;cmavo ( z e i ) &amp;post-word
ZEI <- &cmavo ( z e i ) &post-word


ZI <- &amp;cmavo ( z u / z a / z i ) &amp;post-word
ZI <- &cmavo ( z u / z a / z i ) &post-word


ZIhE <- &amp;cmavo ( z i h e ) &amp;post-word
ZIhE <- &cmavo ( z i h e ) &post-word


ZO <- &amp;cmavo ( z o ) &amp;post-word
ZO <- &cmavo ( z o ) &post-word


ZOI <- &amp;cmavo ( z o i / l a h o ) &amp;post-word
ZOI <- &cmavo ( z o i / l a h o ) &post-word


ZOhU <- &amp;cmavo ( z o h u ) &amp;post-word
ZOhU <- &cmavo ( z o h u ) &post-word
</pre>


</code>
[[Category:BPFK Section]]

Latest revision as of 06:47, 11 June 2015

; This is a Parsing Expression Grammar for the morphology of Lojban.%%%
; See [http://www.pdos.lcs.mit.edu/~baford/packrat/] %%%
; %%%
; All rules have the form %%%
; %%%
; 	name <- peg-expression%%%
; %%%
; which means that the grammatical construct "name" is parsed using%%%
; "peg-expression".  %%%
; %%%
; 1)  Concatenation is expressed by juxtaposition with no operator symbol.%%%
; 2)  / represents *ORDERED* alternation (choice).  If the first%%%
;     option succeeds, the others will never be checked.%%%
; 3)  ? indicates that the element to the left is optional.%%%
; 4)  * represents optional repetition of the construct to the left.%%%
; 5)  + represents one-or-more repetition of the construct to the left.%%%
; 6)  () serves to indicate the grouping of the other operators.%%%
; 7)  & indicates that the element to the right must follow (but the%%%
;     marked element itself does not absorb anything).%%%
; 8)  ! indicates that the element to the right must not follow (the%%%
;     marked element itself does not absorb anything).%%%
; 9)  . represents any character.%%%
; 10) ' ' or " " represents a literal string.%%%
; 11) [] represents a character class.   %%% 
;%%%
; Repetitions grab as much as they can.%%%
;%%%
;%%%
; --- GRAMMAR ---%%%
; This grammar classifies words by their morphological class (cmene,%%%
; gismu, lujvo, fuhivla, cmavo, and non-lojban-word). %%%
; %%%
;The final section sorts cmavo into grammatical classes (A, BAI, BAhE, ..., ZOhU).%%%
;%%%
; mi'e ((xorxes))%%%

;-------------------------------------------------------------------%%%

words <- pause? (word pause?)*

word <- lojban-word / non-lojban-word

lojban-word <- cmene / cmavo / brivla

brivla <- gismu / fuhivla / lujvo

;-------------------------------------------------------------------

cmene <- jbocme / zifcme

zifcme <- !h (nucleus / glide / h / consonant !pause / digit)* consonant &pause

jbocme <- &zifcme (any-syllable / digit)* &pause 

;cmene <- !h &consonant-final coda? (any-syllable / digit)* &pause

;consonant-final <- (non-space &non-space)* consonant &pause

;cmene <- !h cmene-syllable* &consonant coda? consonantal-syllable* onset &pause

;cmene-syllable <- !doi-la-lai-lahi coda? consonantal-syllable* onset nucleus / digit

;doi-la-lai-lahi <- (d o i / l a (h? i)?) !h !nucleus

;-------------------------------------------------------------------

cmavo <- !cmene !CVCy-lujvo cmavo-form &post-word 

CVCy-lujvo <- CVC-rafsi y h? initial-rafsi* brivla-core / stressed-CVC-rafsi y short-final-rafsi

cmavo-form <- !h !cluster onset (nucleus h)* (!stressed nucleus / nucleus !cluster) / y+ / digit

;-------------------------------------------------------------------

lujvo <- !gismu !fuhivla !cmavo initial-rafsi* brivla-core

brivla-core <- fuhivla / gismu / CVV-final-rafsi / stressed-initial-rafsi short-final-rafsi 

stressed-initial-rafsi <- stressed-extended-rafsi / stressed-y-rafsi / stressed-y-less-rafsi

initial-rafsi <- extended-rafsi / y-rafsi / !any-extended-rafsi y-less-rafsi !any-extended-rafsi

any-extended-rafsi <- fuhivla / extended-rafsi / stressed-extended-rafsi

;-------------------------------------------------------------------

fuhivla <- fuhivla-head stressed-syllable consonantal-syllable* final-syllable

stressed-extended-rafsi <- stressed-brivla-rafsi / stressed-fuhivla-rafsi 

extended-rafsi <- brivla-rafsi / fuhivla-rafsi

stressed-brivla-rafsi <- &unstressed-syllable brivla-head stressed-syllable h y

brivla-rafsi <- &(syllable consonantal-syllable* syllable) brivla-head h y h?

stressed-fuhivla-rafsi <- fuhivla-head stressed-syllable !h onset y 

fuhivla-rafsi <- &unstressed-syllable fuhivla-head !h onset y h?

fuhivla-head <- !rafsi-string brivla-head

brivla-head <- !cmavo !slinkuhi !h &onset unstressed-syllable*

slinkuhi <- !rafsi-string consonant rafsi-string 

rafsi-string <- y-less-rafsi* (gismu / CVV-final-rafsi / stressed-y-less-rafsi short-final-rafsi / y-rafsi / stressed-y-rafsi / stressed-y-less-rafsi? initial-pair y / hy-rafsi / stressed-hy-rafsi)

;-------------------------------------------------------------------

gismu <- (initial-pair stressed-vowel / consonant stressed-vowel consonant) &final-syllable consonant vowel &post-word

CVV-final-rafsi <- consonant stressed-vowel h &final-syllable vowel &post-word

short-final-rafsi <- &final-syllable (consonant diphthong / initial-pair vowel) &post-word

stressed-hy-rafsi <- (long-rafsi stressed-vowel / stressed-CCV-rafsi / stressed-CVV-rafsi) h y

stressed-y-rafsi <- (stressed-long-rafsi / stressed-CVC-rafsi) y

stressed-y-less-rafsi <- stressed-CVC-rafsi !y / stressed-CCV-rafsi / stressed-CVV-rafsi

stressed-long-rafsi <- initial-pair stressed-vowel consonant / consonant stressed-vowel consonant consonant

stressed-CVC-rafsi <- consonant stressed-vowel consonant 

stressed-CCV-rafsi <- initial-pair stressed-vowel 

stressed-CVV-rafsi <- consonant (unstressed-vowel h stressed-vowel / stressed-diphthong) r-hyphen? 


hy-rafsi <- (long-rafsi vowel / CCV-rafsi / CVV-rafsi) h y h?

y-rafsi <- (long-rafsi / CVC-rafsi) y h?

y-less-rafsi <- !y-rafsi !stressed-y-rafsi !hy-rafsi !stressed-hy-rafsi (CVC-rafsi / CCV-rafsi / CVV-rafsi) !h 

long-rafsi <- initial-pair unstressed-vowel consonant / consonant unstressed-vowel consonant consonant 

CVC-rafsi <- consonant unstressed-vowel consonant

CCV-rafsi <- initial-pair unstressed-vowel

CVV-rafsi <- consonant (unstressed-vowel h unstressed-vowel / unstressed-diphthong) r-hyphen?

r-hyphen <- r &consonant / n &r

;-------------------------------------------------------------------

final-syllable <-  onset !y !stressed nucleus !cmene &post-word

stressed-syllable <- &stressed syllable / syllable &stress

stressed-diphthong <- &stressed diphthong / diphthong &stress

stressed-vowel <- &stressed vowel / vowel &stress

unstressed-syllable <- !stressed syllable !stress / consonantal-syllable

unstressed-diphthong <- !stressed diphthong !stress

unstressed-vowel <- !stressed vowel !stress

stress <- consonant* h? y? syllable pause

stressed <- onset comma* [AEIOU]

any-syllable <- onset nucleus coda? / consonantal-syllable 

syllable <- onset !y nucleus coda?

consonantal-syllable <- consonant &syllabic coda

coda <- !any-syllable consonant &any-syllable / syllabic? consonant? &pause 

onset <-  h / glide / initial

nucleus <- vowel / diphthong / y !nucleus

;-----------------------------------------------------------------

glide <- (i / u) &nucleus

diphthong <- (a i !i / a u !u / e i !i / o i !i) !nucleus

vowel <- (a / e / i / o / u) !nucleus

a <- comma* [aA] 

e <- comma* [eE] 

i <- comma* [iI] 

o <- comma* [oO] 

u <- comma* [uU] 

y <- comma* [yY] 

;-------------------------------------------------------------------

cluster <- consonant consonant+

initial-pair <- &initial consonant consonant !consonant

initial <- (affricate / sibilant? other? liquid?) !consonant !glide

affricate <- t c / t s / d j / d z

liquid <- l / r 

other <- p / t !l / k / f / x / b / d !l / g / v / m / n !liquid 

sibilant <- c / s !x / (j / z) !n !liquid

consonant <- voiced / unvoiced / syllabic

syllabic <- l / m / n / r

voiced <- b / d / g / j / v / z

unvoiced <- c / f / k / p / s / t / x

l <- comma* [lL] !h !glide !l

m <- comma* [mM] !h !glide !m !z

n <- comma* [nN] !h !glide !n !affricate

r <- comma* [rR] !h !glide !r

b <- comma* [bB] !h !glide !b !unvoiced

d <- comma* [dD] !h !glide !d !unvoiced

g <- comma* [gG] !h !glide !g !unvoiced

v <- comma* [vV] !h !glide !v !unvoiced

j <- comma* [jJ] !h !glide !j !z !unvoiced

z <- comma* [zZ] !h !glide !z !j !unvoiced

s <- comma* [sS] !h !glide !s !c !voiced

c <- comma* [cC] !h !glide !c !s !x !voiced

x <- comma* [xX] !h !glide !x !c !k !voiced

k <- comma* [kK] !h !glide !k !x !voiced

f <- comma* [fF] !h !glide !f !voiced

p <- comma* [pP] !h !glide !p !voiced

t <- comma* [tT] !h !glide !t !voiced

h <- comma* ['h] &nucleus

;-------------------------------------------------------------------

digit <- comma* [0123456789] !h !nucleus

post-word <- pause / !nucleus lojban-word

pause <- comma* space-char+ / EOF

EOF <- comma* !.

comma <- [,]

non-lojban-word <- !lojban-word non-space+

NORATS non-space <- !space-char .

NORATS space-char <- [.\t\n\r?!\u0020]

;-------------------------------------------------------------------

spaces <- !Y initial-spaces

initial-spaces <- (comma* space-char / !ybu Y)+ EOF? / EOF

ybu <- Y space-char* BU

;-------------------------------------------------------------------

A <- &cmavo ( a / e / j i / o / u ) &post-word

BAI <- &cmavo ( d u h o / s i h u / z a u / k i h i / d u h i / c u h u / t u h i / t i h u / d i h o / j i h u / r i h a / n i h i / m u h i / k i h u / v a h u / k o i / c a h i / t a h i / p u h e / j a h i / k a i / b a i / f i h e / d e h i / c i h o / m a u / m u h u / r i h i / r a h i / k a h a / p a h u / p a h a / l e h a / k u h u / t a i / b a u / m a h i / c i h e / f a u / p o h i / c a u / m a h e / c i h u / r a h a / p u h a / l i h e / l a h u / b a h i / k a h i / s a u / f a h e / b e h i / t i h i / j a h e / g a h a / v a h o / j i h o / m e h a / d o h e / j i h e / p i h o / g a u / z u h e / m e h e / r a i ) &post-word

BAhE <- &cmavo ( b a h e / z a h e ) &post-word

BE <- &cmavo ( b e ) &post-word

BEI <- &cmavo ( b e i ) &post-word

BEhO <- &cmavo ( b e h o ) &post-word

BIhE <- &cmavo ( b i h e ) &post-word

BIhI <- &cmavo ( m i h i / b i h o / b i h i ) &post-word

BO <- &cmavo ( b o ) &post-word

BOI <- &cmavo ( b o i ) &post-word

BU <- &cmavo ( b u ) &post-word

BY <- ybu / &cmavo ( j o h o / r u h o / g e h o / j e h o / l o h a / n a h a / s e h e / t o h a / g a h e / y h y /  b y / c y / d y / f y / g y / j y / k y / l y / m y / n y / p y / r y / s y / t y / v y / x y / z y ) &post-word

CAhA <- &cmavo ( c a h a / p u h i / n u h o / k a h e ) &post-word

CAI <- &cmavo ( p e i / c a i / c u h i / s a i / r u h e ) &post-word

CEI <- &cmavo ( c e i ) &post-word

CEhE <- &cmavo ( c e h e ) &post-word

CO <- &cmavo ( c o ) &post-word

COI <- &cmavo ( j u h i / c o i / f i h i / t a h a / m u h o / f e h o / c o h o / p e h u / k e h o / n u h e / r e h i / b e h e / j e h e / m i h e / k i h e / v i h o ) &post-word

CU <- &cmavo ( c u ) &post-word

CUhE <- &cmavo ( c u h e / n a u ) &post-word

DAhO <- &cmavo ( d a h o ) &post-word

DOI <- &cmavo ( d o i ) &post-word

DOhU <- &cmavo ( d o h u ) &post-word

FA <- &cmavo ( f a i / f a / f e / f o / f u / f i h a / f i ) &post-word

FAhA <- &cmavo ( d u h a / b e h a / n e h u / v u h a / g a h u / t i h a / n i h a / c a h u / z u h a / r i h u / r u h u / r e h  o / t e h e / b u h u / n e h a / p a h o / n e h i / t o h o / z o h i / z e h o / z o h a / f a h a ) &post-word

FAhO <- &cmavo ( f a h o ) &post-word

FEhE <- &cmavo ( f e h e ) &post-word

FEhU <- &cmavo ( f e h u ) &post-word

FIhO <- &cmavo ( f i h o ) &post-word

FOI <- &cmavo ( f o i ) &post-word

FUhA <- &cmavo ( f u h a ) &post-word

FUhE <- &cmavo ( f u h e ) &post-word

FUhO <- &cmavo ( f u h o ) &post-word

GA <- &cmavo ( g e h i / g e /  g o / g a / g u ) &post-word

GAhO <- &cmavo ( k e h i / g a h o ) &post-word

GEhU <- &cmavo ( g e h u ) &post-word

GI <- &cmavo ( g i ) &post-word

GIhA <- &cmavo ( g i h e / g i h i / g i h o / g i h a / g i h u ) &post-word

GOI <- &cmavo ( n o h u / n e / g o i / p o h u / p e / p o h e / p o ) &post-word

GOhA <- &cmavo ( m o / n e i / g o h u / g o h o / g o h i / n o h a / g o h e / g o h a / d u / b u h a / b u h e / b u h i / c o h  e ) &post-word

GUhA <- &cmavo ( g u h e / g u h i / g u h o / g u h a / g u h u ) &post-word

I <- &cmavo ( i ) &post-word

JA <- &cmavo ( j e h i / j e /  j o / j a / j u ) &post-word

JAI <- &cmavo ( j a i ) &post-word

JOhI <- &cmavo ( j o h i ) &post-word

JOI <- &cmavo ( f a h u / p i h u / j o i / c e h o / c e / j o h u / k u h a / j o h e / j u h e ) &post-word

KE <- &cmavo ( k e ) &post-word

KEhE <- &cmavo ( k e h e ) &post-word

KEI <- &cmavo ( k e i ) &post-word

KI <- &cmavo ( k i ) &post-word

KOhA <- &cmavo ( d a h u / d a h e / d i h u / d i h e / d e h u / d e h e / d e i / d o h i / m i h o / m a h a / m i h a / d o h o  / k o h a / f o h u / k o h e / k o h i / k o h o / k o h u / f o h a / f o h e / f o h i / f o h o / v o h a / v o h e / v o h i /  v o h o / v o h u / r u / r i / r a / t a / t u / t i / z i h o / k e h a / m a / z u h i / z o h e / c e h u / d a / d e / d i / k  o / m i / d o ) &post-word

KU <- &cmavo ( k u ) &post-word

KUhE <- &cmavo ( k u h e ) &post-word

KUhO <- &cmavo ( k u h o ) &post-word

LA <- &cmavo ( l a i / l a h i / l a ) &post-word

LAU <- &cmavo ( c e h a / l a u / z a i / t a u ) &post-word

LAhE <- &cmavo ( t u h a / l u h a / l u h o / l a h e / v u h i / l u h i / l u h e ) &post-word

LE <- &cmavo ( l e i / l o i / l e h i / l o h i / l e h e / l o h e / l o / l e ) &post-word

LEhU <- &cmavo ( l e h u ) &post-word

LI <- &cmavo ( m e h o / l i ) &post-word

LIhU <- &cmavo ( l i h u ) &post-word

LOhO <- &cmavo ( l o h o ) &post-word

LOhU <- &cmavo ( l o h u ) &post-word

LU <- &cmavo ( l u ) &post-word

LUhU <- &cmavo ( l u h u ) &post-word

MAhO <- &cmavo ( m a h o ) &post-word

MAI <- &cmavo ( m o h o / m a i ) &post-word

ME <- &cmavo ( m e ) &post-word

MEhU <- &cmavo ( m e h u ) &post-word

MOhE <- &cmavo ( m o h e ) &post-word

MOhI <- &cmavo ( m o h i ) &post-word

MOI <- &cmavo ( m e i / m o i / s i h e / c u h o / v a h e ) &post-word

NA <- &cmavo ( j a h a / n a ) &post-word

NAI <- &cmavo ( n a i ) &post-word

NAhE <- &cmavo ( t o h e / j e h a / n a h e / n o h e ) &post-word

NAhU <- &cmavo ( n a h u ) &post-word

NIhE <- &cmavo ( n i h e ) &post-word

NIhO <- &cmavo ( n i h o / n o h i ) &post-word

NOI <- &cmavo ( v o i / n o i / p o i ) &post-word

NU <- &cmavo ( n i / d u h u / s i h o / n u / l i h i / k a / j e i / s u h u / z u h o / m u h e / p u h u / z a h i ) &post-word

NUhA <- &cmavo ( n u h a ) &post-word

NUhI <- &cmavo ( n u h i ) &post-word

NUhU <- &cmavo ( n u h u ) &post-word

PA <- &cmavo ( d a u / f e i / g a i / j a u / r e i / v a i / p i h e / p i /  f i h u / z a h u / m e h i / n i h u / k i h o / c e h i / m a h u / r a h e / d a h a / s o h a / j i h i / s u h o / s u h e / r o / r a u / s o h u / s o h i / s o h e / s o h o / m o h a / d u h e / t e h o / k a h o / c i h i / t u h o / x o / p a i / n o h o / n o / p a / r e / c i / v o / m u / x a / z e / b i / s o / digit ) &post-word

PEhE <- &cmavo ( p e h e ) &post-word

PEhO <- &cmavo ( p e h o ) &post-word

PU <- &cmavo ( b a / p u / c a ) &post-word

RAhO <- &cmavo ( r a h o ) &post-word

ROI <- &cmavo ( r e h u / r o i ) &post-word

SA <- &cmavo ( s a ) &post-word

SE <- &cmavo ( s e / t e / v e / x e ) &post-word

SEI <- &cmavo ( s e i / t i h o ) &post-word

SEhU <- &cmavo ( s e h u ) &post-word

SI <- &cmavo ( s i ) &post-word

SOI <- &cmavo ( s o i ) &post-word

SU <- &cmavo ( s u ) &post-word

TAhE <- &cmavo ( r u h i / t a h e / d i h i / n a h o ) &post-word

TEhU <- &cmavo ( t e h u ) &post-word

TEI <- &cmavo ( t e i ) &post-word

TO <- &cmavo ( t o h i / t o ) &post-word

TOI <- &cmavo ( t o i ) &post-word

TUhE <- &cmavo ( t u h e ) &post-word

TUhU <- &cmavo ( t u h u ) &post-word

UI <- &cmavo ( i h a / i e / a h e / u h i / i h o / i h e / a h a / i a / o h i / o h e / e h e / o i / u o / e h i / u h o / a u /  u a / a h i / i h u / i i / u h a / u i / a h o / a i / a h u / i u / e i / o h o / e h a / u u / o h a / o h u / u h u / e h o / i  o / e h u / u e / i h i / u h e / b a h a / j a h o / c a h e / s u h a / t i h e / k a h u / s e h o / z a h a / p e h i / r u h a  / j u h a / t a h o / r a h u / l i h a / b a h u / m u h a / d o h a / t o h u / v a h i / p a h e / z u h u / s a h e / l a h a /  k e h u / s a h u / d a h i / j e h u / s a h a / k a u / t a h u / n a h i / j o h a / b i h u / l i h o / p a u / m i h u / k u h  i / j i h a / s i h a / p o h o / p e h a / r o h i / r o h e / r o h o / r o h u / r o h a / r e h e / l e h o / j u h o / f u h i  / d a i / g a h i / z o h o / b e h u / r i h e / s e h i / s e h a / v u h e / k i h a / x u / g e h e / b u h o ) &post-word

VA <- &cmavo ( v i / v a / v u ) &post-word

VAU <- &cmavo ( v a u ) &post-word

VEI <- &cmavo ( v e i ) &post-word

VEhO <- &cmavo ( v e h o ) &post-word

VUhU <- &cmavo ( g e h a / f u h u / p i h i / f e h i / v u h u / s u h i / j u h u / g e i / p a h i / f a h i / t e h a / c u h a  / v a h a / n e h o / d e h o / f e h a / s a h o / r e h a / r i h o / s a h i / p i h a / s i h i ) &post-word

VEhA <- &cmavo ( v e h u / v e h a / v e h i / v e h e ) &post-word

VIhA <- &cmavo ( v i h i / v i h a / v i h u / v i h e ) &post-word

VUhO <- &cmavo ( v u h o ) &post-word

XI <- &cmavo ( x i ) &post-word

Y <- &cmavo ( y+ ) &post-word

ZAhO <- &cmavo ( c o h i / p u h o / c o h u / m o h u / c a h o / c o h a / d e h a / b a h o / d i h a / z a h o ) &post-word

ZEhA <- &cmavo ( z e h u / z e h a / z e h i / z e h e ) &post-word

ZEI <- &cmavo ( z e i ) &post-word

ZI <- &cmavo ( z u / z a / z i ) &post-word

ZIhE <- &cmavo ( z i h e ) &post-word

ZO <- &cmavo ( z o ) &post-word

ZOI <- &cmavo ( z o i / l a h o ) &post-word

ZOhU <- &cmavo ( z o h u ) &post-word