编译器设计 实践

LEX

LEX 最简分词 示例, 类似cat命令

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
LEX 代码
chunli@ubuntu:~/lab/yacc/lex$ cat 001.l
%%
.|\n ECHO;
%%
chunli@ubuntu:~/lab/yacc/lex$



编译 && 运行
chunli@ubuntu:~/lab/yacc/lex$ lex 001.l
chunli@ubuntu:~/lab/yacc/lex$ gcc lex.yy.c -ll && ./a.out
3213213
3213213
412312
412312
534534
534534
52423
52423

^C
chunli@ubuntu:~/lab/yacc/lex$

LEX 分词器 识别动词方法

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
LEX 代码
chunli@ubuntu:~/lab/yacc/lex$ cat 001.l
%{
/*
* 演示识别 动词/非动词
*
*/
%}

%%
[ \t]+ ; // 忽略空白

is |
am |
are |
were |
was |
be |
being |
been |
do |
dose |
did |
will |
would |
shold |
could |
can |
has |
have |
had |
go {printf("%s is verb\n", yytext);}
[a-zA-Z]+ {printf("%s is not verb\n", yytext);}

.|\n ECHO;
%%
chunli@ubuntu:~/lab/yacc/lex$

编译 && 运行
chunli@ubuntu:~/lab/yacc/lex$ lex 001.l
chunli@ubuntu:~/lab/yacc/lex$ gcc lex.yy.c -ll && ./a.out
are you ok ?
are is verb
you is not verb
ok is not verb
?


did I have fun ?
did is verb
I is not verb
have is verb
fun is not verb
?


chunli@ubuntu:~/lab/yacc/lex$

添加更多词性

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
chunli@ubuntu:~/lab/yacc/lex$ cat 001.l
%{
/*
* 演示识别 动词/非动词
*
*/
%}

%%
[ \t]+ ; // 忽略空白

is |
am |
are |
were |
was |
be |
being |
been |
do |
dose |
did |
will |
would |
shold |
could |
can |
has |
have |
had |
go {printf("%s is verb\n", yytext);}

very |
simply |
gently |
quietly |
camly |
angrily {printf("%s is adverb\n", yytext);}

to |
from |
behind |
below |
between |
above {printf("%s is preposition\n", yytext);}

if |
then |
and |
but |
or {printf("%s is conjunction\n", yytext);}

I |
you |
he |
she |
we |
theya {printf("%s is pronoun\n", yytext);}

[a-zA-Z]+ {printf("%s is not verb\n", yytext);}

.|\n ECHO;
%%
chunli@ubuntu:~/lab/yacc/lex$


编译 && 运行
chunli@ubuntu:~/lab/yacc/lex$ lex 001.l
chunli@ubuntu:~/lab/yacc/lex$ gcc lex.yy.c -l l && ./a.out
did you if very nice?
did is verb
you is pronoun
if is conjunction
very is adverb
nice is not verb
?






if I have simply idea
if is conjunction
I is pronoun
have is verb
simply is adverb
idea is not verb

^C
chunli@ubuntu:~/lab/yacc/lex$

一种自动识别 单词词性的分词器

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
LEX 程序代码
chunli@ubuntu:~/lab/yacc/lex$ cat 001.l
%{
/*
* 带符号表的单词识别程序
*/


enum
{
LOOKUP = 0, // 默认 -- 查找而不是定义
VERB,
ADJ,
ADV,
NOUN,
PREP,
PRON,
CONJ
};

int state;
int add_word(int type, char *word);
int lookup_word(char *word);

%}

%%

\n {state = LOOKUP;} /* 行结束, 返回到默认状态 */

/* 无论何时, 行都以 保留字的词性名字开始 */
/* 开始定义该类型的单词 */

^verb {state = VERB;}
^adj {state = ADJ; }
^adv {state = ADV; }
^nonu {state = NOUN;}
^prep {state = PREP;}
^pron {state = PRON;}
^conj {state = CONJ;}


[a-zA-Z]+ {
/* 遇到标准的单词, 查找它 或 定义它 */
if(state != LOOKUP)
{
// 定义当前的单词
add_word(state, yytext);
}
else
{
switch(lookup_word(yytext))
{
case VERB: printf("[%s] is verb\n", yytext); break;
case ADJ : printf("[%s] is adj\n", yytext); break;
case ADV: printf("[%s] is adv\n", yytext); break;
case NOUN: printf("[%s] is nonu\n", yytext); break;
case PREP: printf("[%s] is prep\n", yytext); break;
case PRON: printf("[%s] is pron\n", yytext); break;
case CONJ: printf("[%s] is conj\n", yytext); break;
default : printf("[%s] don't recognize\n", yytext);break;
}
}

}
. ; /* 忽略其他所有 */

%%

int
main()
{
yylex();
}

/* 定义一个链接单词的 类型 与列表 */
struct word
{
char *word_name;
int word_type;
struct word *next;

};

struct word *word_list;

extern void *malloc();

int add_word(int type, char *word)
{
struct word *wp;
if(lookup_word(word) != LOOKUP)
{
printf("warn! already define word [%s]\n", word);
return 0;
}

wp = (struct word*)malloc(sizeof(struct word));
wp->next = word_list;
wp->word_name = (char*)malloc(strlen(word) +1);
strcpy(wp->word_name, word);
wp->word_type = type;
word_list = wp;
return 1; // 它被处理过
}



int lookup_word(char *word)
{
struct word * wp = word_list;
for(;wp; wp = wp -> next)
{
if(strcmp(wp->word_name, word) == 0)
{
//找到了
return wp->word_type;
}
}

// 没有找到
return LOOKUP;
}
chunli@ubuntu:~/lab/yacc/lex$






编译 && 运行
chunli@ubuntu:~/lab/yacc/lex$ lex 001.l && gcc lex.yy.c -l l && ./a.out
pron I you he she
verb is am are was be dose
prep to from
conj if then and but or

I is boy
[I] is pron
[is] is verb
[boy] don't recognize

are you ok?
[are] is verb
[you] is pron
[ok] don't recognize

LEX & YACC