少々強引に、動くようにしてみたり。

/と*と-が、lexer辺りでうまく処理されてなかったので、
そいつだけ独立したトークンにしつつ、Parserでまとめる感じに。
空白文字列がgrammarに山ほど出てきてしまうのは、もうちょっと先で対応するつもり。

grammar TwoWaySQL;

options {
	language = Java;
	output = AST;
	ASTLabelType = CommonTree;
}

tokens {
	BEGINNODE;
	IFNODE;
	EXPRESSIONNODE;
	ELSENODE;
}

@header {
package twowaysql.grammar;
}

@lexer::header {
package twowaysql.grammar;
}

twowaySQL : txt EOF;

txt 	:
	(comment 
	| charactors
	| WS | LT)+
	;

charactors
	:	(IDENT| SYM_A| SYM_S | SYM_H)+
	;

// $<comment

comment :
	begincomment
	| ifcomment
	| blockcomment
	| linecomment
	;


blockcomment :
	C_ST (charactors | LT)+ C_ED
	;

linecomment :
	C_LN (charactors)+ (LT|EOF)
	;

ifcomment :
	C_ST IF expression C_ED txt (elsecomment txt)* endcomment 
		-> ^(IFNODE ^(EXPRESSIONNODE expression) txt ^(ELSENODE elsecomment txt)*)
	;

elsecomment :
	(C_ST ELSE expression? C_ED -> ^(EXPRESSIONNODE expression?)
	|C_LN ELSE lineexpression? LT -> ^(EXPRESSIONNODE lineexpression?)
	) 
	;

expression :
	(charactors | WS | LT)+
	;
	
lineexpression :
	(charactors)+
;

begincomment :	
	((C_ST BEGIN C_ED | C_LN BEGIN LT) txt endcomment) -> ^(BEGINNODE txt)
	;

endcomment :
	C_ST END C_ED | C_LN END (LT|EOF)
	;

// $>
C_ST	:	'/*';
C_ED	:	'*/';
C_LN	:	'--';

SYM_A 	:	'*';
SYM_S	:	'/';
SYM_H	:	'-';

BEGIN :  WS* ('b'|'B')('e'|'E')('g'|'G')('i'|'I')('n'|'N') WS*;
IF	:  WS*('i'|'I')('f'|'F') WS*;
ELSE	:  WS* ('e'|'E')('l'|'L')('s'|'S')('e'|'E') WS*;
END	: WS* ('e'|'E')('n'|'N')('d'|'D') WS*;

// $<Charactors
LT	: '\r\n' | '\n'	| '\r';
WS	: '\t' | '\v' | '\f' | ' ' | '\u00A0';

//IDENT :	(~(C_ST|C_ED|C_LN))+;
IDENT	: WS* CHAR+ WS*;

fragment CHAR	: ~(SYM_A | SYM_S | SYM_H | LT | WS);
// $>

こんな感じのテキストを食わせると、そこそこASTになる感じ。

ada.cec のしのし / 3 d/*aa hoge
 piro*/  ぎゃぼー
-- BeGiN
 /* If aa - bb
 dd*/moge piro /*eLsE ccc + 44*/cccd
	-- elSE fuga < 0
		dfdze
	-- ElSe 
	 zzz
 -- eNd
	-- moge うぼぁ zz
/* EnD*/