前回の続きです。今回はコンピュータシステムの理論と実装(以下、nand2tetris本)の10章のコンパイラ#1:構文解析をC言語で実装してみました。
今回のコード
下記、タグv0.0.3
になります。
下記で動かせます。
git clone -b v0.0.3 https://github.com/nihemak/nand2tetris.git cd nand2tetris # download nand2tetris environment ./setup.sh # test all ./test.sh
概要
今回はコンパイラの構文解析部分です。実装は書籍にしたがって2段階で行いました。
.jackファイル
または.jackファイル群
を入力として受け取りそれぞれに対応する字句解析結果であるT.xmlファイル
を生成するコマンドを実装- 構文解析結果である
.xmlファイル
を生成するコマンドに改造
トークナイザ
ここでは字句解析を行い下記のトークンに分割しました。
トークン | 概要 |
---|---|
keyword | class , method , function , constructor , int , boolean , char , void , var , static , field , let , do , if , else , while , return , true , false , null , this |
symbol | { , } , ( , ) , [ , ] , . , , , ; , + , - , * , / , & , | , < , > , = , ~ |
identifier | 数字以外から始まるアルファベット、数字、アンダースコアの文字列 |
integerConstant | 0から32767 |
stringConstant | ダブルクォートで囲まれた文字列 |
下記で使えます。
cp -r ./nand2tetris/projects/10/Square 10/JackAnalyzer/ && \ mkdir -p 10/JackAnalyzer/Square/expect && \ mv 10/JackAnalyzer/Square/*.xml 10/JackAnalyzer/Square/expect/ # ...(省略)... cd 10/JackAnalyzer/ clang --std=c11 -Wall -Wextra -o JackAnalyzer main.c JackTokenizer.c JackTokenizerPrivate.c ./JackAnalyzer Square
main.c (JackAnalyzerモジュール)
コマンドのエントリポイントです。書籍ではJackAnalyzerモジュールと呼ばれています。
main.c
ではコマンド引数の解析、JackTokenizerモジュールを用いた字句解析結果であるT.xmlファイル
への変換処理を行います。main.c
の中で使う関数はmain.c内で下記のように定義しました。
10/JackAnalyzer/main.c:L10-L23
int analyzeByJackDir(DIR *dpJack, char *jackDirName); int analyzeByJackFile(char *jackFileName); int analyze(char *xmlFilePath, char *jackFilePath); bool isJackFileName(char *jackFileName); void createJackFilePath(char *jackDirName, char *jackFileName, char *jackFilePath); void createXmlFilePathFromDirName(char *jackDirName, char *jackFileName, char *xmlFilePath); void createXmlFilePathFromJackFileName(char *jackFileName, char *xmlFilePath); void writeTokens(FILE *fp, JackTokenizer tokenizer); void writeToken(FILE *fp, JackTokenizer tokenizer); void writeKeyword(FILE *fp, JackTokenizer tokenizer); void writeSymbol(FILE *fp, JackTokenizer tokenizer); void writeIdentifier(FILE *fp, JackTokenizer tokenizer); void writeIntegerConstant(FILE *fp, JackTokenizer tokenizer); void writeStringConstant(FILE *fp, JackTokenizer tokenizer);
コマンド引数には.jackファイル
またはjackファイルを複数含むディレクトリ
にいづれかを指定できます。関数はanalyzeByJackDir
およびanalyzeByJackFile
です。この処理は前回のバーチャルマシンでの実装とほぼ同じです。ただし出力が1ファイルのバーチャルマシンと違いこちらは.jack
に対して一対一に対応する.xml
を出力します。
変換処理の実装は下記の通りです。JackTokenizerモジュールを使用して.xml
を作成します。
10/JackAnalyzer/main.c:L127-L150
int analyze(char *xmlFilePath, char *jackFilePath) { FILE *fpJack, *fpXml; JackTokenizer tokenizer; if ((fpJack = fopen(jackFilePath, "r")) == NULL) { fprintf(stderr, "Error: jack file not found (%s)\n", jackFilePath); return 1; } if ((fpXml = fopen(xmlFilePath, "w")) == NULL) { fprintf(stderr, "Error: xml file not open (%s)\n", xmlFilePath); fclose(fpJack); return 1; } tokenizer = JackTokenizer_init(fpJack); writeTokens(fpXml, tokenizer); fclose(fpXml); fclose(fpJack); return 0; }
.xml
へのタグの書き込みは下記の通りです。.xml
なので<
, >
, &
はそれぞれ<
, >
, &"
に変換しました。
10/JackAnalyzer/main.c:L197-L309
void writeTokens(FILE *fp, JackTokenizer tokenizer) { fprintf(fp, "<tokens>\n"); while (JackTokenizer_hasMoreTokens(tokenizer)) { JackTokenizer_advance(tokenizer); writeToken(fp, tokenizer); } fprintf(fp, "</tokens>\n"); } void writeToken(FILE *fp, JackTokenizer tokenizer) { switch (JackTokenizer_tokenType(tokenizer)) { case JACK_TOKENIZER_TOKEN_TYPE_KEYWORD: writeKeyword(fp, tokenizer); break; case JACK_TOKENIZER_TOKEN_TYPE_SYMBOL: writeSymbol(fp, tokenizer); break; case JACK_TOKENIZER_TOKEN_TYPE_IDENTIFIER: writeIdentifier(fp, tokenizer); break; case JACK_TOKENIZER_TOKEN_TYPE_INT_CONST: writeIntegerConstant(fp, tokenizer); break; case JACK_TOKENIZER_TOKEN_TYPE_STRING_CONST: writeStringConstant(fp, tokenizer); break; default: break; } } void writeKeyword(FILE *fp, JackTokenizer tokenizer) { struct keyword { JackTokenizer_Keyword id; char *string; }; struct keyword keywords[] = { { JACK_TOKENIZER_KEYWORD_CLASS, "class" }, { JACK_TOKENIZER_KEYWORD_METHOD, "method" }, { JACK_TOKENIZER_KEYWORD_FUNCTION, "function" }, { JACK_TOKENIZER_KEYWORD_CONSTRUCTION, "constructor" }, { JACK_TOKENIZER_KEYWORD_INT, "int" }, { JACK_TOKENIZER_KEYWORD_BOOLEAN, "boolean" }, { JACK_TOKENIZER_KEYWORD_CHAR, "char" }, { JACK_TOKENIZER_KEYWORD_VOID, "void" }, { JACK_TOKENIZER_KEYWORD_VAR, "var" }, { JACK_TOKENIZER_KEYWORD_STATIC, "static" }, { JACK_TOKENIZER_KEYWORD_FIELD, "field" }, { JACK_TOKENIZER_KEYWORD_LET, "let" }, { JACK_TOKENIZER_KEYWORD_DO, "do" }, { JACK_TOKENIZER_KEYWORD_IF, "if" }, { JACK_TOKENIZER_KEYWORD_ELSE, "else" }, { JACK_TOKENIZER_KEYWORD_WHILE, "while" }, { JACK_TOKENIZER_KEYWORD_RETURN, "return" }, { JACK_TOKENIZER_KEYWORD_TRUE, "true" }, { JACK_TOKENIZER_KEYWORD_FALSE, "false" }, { JACK_TOKENIZER_KEYWORD_NULL, "null" }, { JACK_TOKENIZER_KEYWORD_THIS, "this" }, }; JackTokenizer_Keyword id = JackTokenizer_keyword(tokenizer); fprintf(fp, "<keyword> "); for (size_t i = 0; i < sizeof(keywords) / sizeof(keywords[0]); i++) { if (id == keywords[i].id) { fprintf(fp, "%s", keywords[i].string); break; } } fprintf(fp, " </keyword>\n"); } void writeSymbol(FILE *fp, JackTokenizer tokenizer) { char token[JACK_TOKEN_SIZE]; JackTokenizer_symbol(tokenizer, token); fprintf(fp, "<symbol> "); if (strcmp(token, "<") == 0) { fprintf(fp, "<"); } else if (strcmp(token, ">") == 0) { fprintf(fp, ">"); } else if (strcmp(token, "&") == 0) { fprintf(fp, "&"); } else { fprintf(fp, "%s", token); } fprintf(fp, " </symbol>\n"); } void writeIdentifier(FILE *fp, JackTokenizer tokenizer) { char token[JACK_TOKEN_SIZE]; JackTokenizer_identifier(tokenizer, token); fprintf(fp, "<identifier> %s </identifier>\n", token); } void writeIntegerConstant(FILE *fp, JackTokenizer tokenizer) { int intVal; JackTokenizer_intVal(tokenizer, &intVal); fprintf(fp, "<integerConstant> %d </integerConstant>\n", intVal); } void writeStringConstant(FILE *fp, JackTokenizer tokenizer) { char token[JACK_TOKEN_SIZE]; JackTokenizer_stringVal(tokenizer, token); fprintf(fp, "<stringConstant> %s </stringConstant>\n", token); }
JackTokenizerモジュール
.jackファイル
を字句解析するためのモジュールです。
main.c
で利用する関数はJackTokenizer.h
で下記のように定義しました。jack_tokenizer構造体はtypedefして定義はJackTokenizer.c
内に隠蔽するようにしてオブジェクトとして使うようにしました。それぞれの実装はJackTokenizer.c
で行いました。
10/JackAnalyzer/JackTokenizer.h:L9-L53
typedef enum { JACK_TOKENIZER_TOKEN_TYPE_KEYWORD = 1, JACK_TOKENIZER_TOKEN_TYPE_SYMBOL, JACK_TOKENIZER_TOKEN_TYPE_IDENTIFIER, JACK_TOKENIZER_TOKEN_TYPE_INT_CONST, JACK_TOKENIZER_TOKEN_TYPE_STRING_CONST, JACK_TOKENIZER_TOKEN_TYPE_STRING_UNKNOWN } JackTokenizer_TokenType; typedef enum { JACK_TOKENIZER_KEYWORD_CLASS = 1, JACK_TOKENIZER_KEYWORD_METHOD, JACK_TOKENIZER_KEYWORD_FUNCTION, JACK_TOKENIZER_KEYWORD_CONSTRUCTION, JACK_TOKENIZER_KEYWORD_INT, JACK_TOKENIZER_KEYWORD_BOOLEAN, JACK_TOKENIZER_KEYWORD_CHAR, JACK_TOKENIZER_KEYWORD_VOID, JACK_TOKENIZER_KEYWORD_VAR, JACK_TOKENIZER_KEYWORD_STATIC, JACK_TOKENIZER_KEYWORD_FIELD, JACK_TOKENIZER_KEYWORD_LET, JACK_TOKENIZER_KEYWORD_DO, JACK_TOKENIZER_KEYWORD_IF, JACK_TOKENIZER_KEYWORD_ELSE, JACK_TOKENIZER_KEYWORD_WHILE, JACK_TOKENIZER_KEYWORD_RETURN, JACK_TOKENIZER_KEYWORD_TRUE, JACK_TOKENIZER_KEYWORD_FALSE, JACK_TOKENIZER_KEYWORD_NULL, JACK_TOKENIZER_KEYWORD_THIS, JACK_TOKENIZER_KEYWORD_UNKNOWN } JackTokenizer_Keyword; typedef struct jack_tokenizer * JackTokenizer; JackTokenizer JackTokenizer_init(FILE *fpJack); bool JackTokenizer_hasMoreTokens(JackTokenizer thisObject); void JackTokenizer_advance(JackTokenizer thisObject); JackTokenizer_TokenType JackTokenizer_tokenType(JackTokenizer thisObject); JackTokenizer_Keyword JackTokenizer_keyword(JackTokenizer thisObject); void JackTokenizer_symbol(JackTokenizer thisObject, char *symbol); void JackTokenizer_identifier(JackTokenizer thisObject, char *identifier); void JackTokenizer_intVal(JackTokenizer thisObject, int *intVal); void JackTokenizer_stringVal(JackTokenizer thisObject, char *stringVal);
またJackTokenizer.c
内で使う関数はJackTokenizerPrivate.h
で下記のように定義しJackTokenizerPrivate.c
で実装しました。
10/JackAnalyzer/JackTokenizerPrivate.h:L7-L12
void moveNextToken(FILE *fp); bool isEndOfFile(FILE *fp); bool getTokenSymbol(FILE *fp, char *token); bool getTokenStringConstant(FILE *fp, char *token); bool getTokenIntConstant(FILE *fp, char *token); bool getTokenIdentifierOrKeyword(FILE *fp, char *token);
現在のトークンの種類やキーワードの場合のキーワードの種類の判断はJackTokenizer_advance
関数で行いました。
10/JackAnalyzer/JackTokenizer.c:L36-L56
void JackTokenizer_advance(JackTokenizer thisObject) { thisObject->keyword = JACK_TOKENIZER_KEYWORD_UNKNOWN; if (getTokenSymbol(thisObject->fpJack, thisObject->token)) { thisObject->type = JACK_TOKENIZER_TOKEN_TYPE_SYMBOL; } else if (getTokenStringConstant(thisObject->fpJack, thisObject->token)) { thisObject->type = JACK_TOKENIZER_TOKEN_TYPE_STRING_CONST; } else if (getTokenIntConstant(thisObject->fpJack, thisObject->token)) { thisObject->type = JACK_TOKENIZER_TOKEN_TYPE_INT_CONST; } else { getTokenIdentifierOrKeyword(thisObject->fpJack, thisObject->token); thisObject->keyword = convertIdentifierToKeyword(thisObject->token); if (thisObject->keyword == JACK_TOKENIZER_KEYWORD_UNKNOWN) { thisObject->type = JACK_TOKENIZER_TOKEN_TYPE_IDENTIFIER; } else { thisObject->type = JACK_TOKENIZER_TOKEN_TYPE_KEYWORD; } } moveNextToken(thisObject->fpJack); }
キーワードの種類の文字列をJackTokenizer_Keyword
定数に変換する処理は下記の通りです。
10/JackAnalyzer/JackTokenizer.c:L96-L131
JackTokenizer_Keyword convertIdentifierToKeyword(char *token) { struct keyword { JackTokenizer_Keyword id; char *string; }; struct keyword keywords[] = { { JACK_TOKENIZER_KEYWORD_CLASS, "class" }, { JACK_TOKENIZER_KEYWORD_METHOD, "method" }, { JACK_TOKENIZER_KEYWORD_FUNCTION, "function" }, { JACK_TOKENIZER_KEYWORD_CONSTRUCTION, "constructor" }, { JACK_TOKENIZER_KEYWORD_INT, "int" }, { JACK_TOKENIZER_KEYWORD_BOOLEAN, "boolean" }, { JACK_TOKENIZER_KEYWORD_CHAR, "char" }, { JACK_TOKENIZER_KEYWORD_VOID, "void" }, { JACK_TOKENIZER_KEYWORD_VAR, "var" }, { JACK_TOKENIZER_KEYWORD_STATIC, "static" }, { JACK_TOKENIZER_KEYWORD_FIELD, "field" }, { JACK_TOKENIZER_KEYWORD_LET, "let" }, { JACK_TOKENIZER_KEYWORD_DO, "do" }, { JACK_TOKENIZER_KEYWORD_IF, "if" }, { JACK_TOKENIZER_KEYWORD_ELSE, "else" }, { JACK_TOKENIZER_KEYWORD_WHILE, "while" }, { JACK_TOKENIZER_KEYWORD_RETURN, "return" }, { JACK_TOKENIZER_KEYWORD_TRUE, "true" }, { JACK_TOKENIZER_KEYWORD_FALSE, "false" }, { JACK_TOKENIZER_KEYWORD_NULL, "null" }, { JACK_TOKENIZER_KEYWORD_THIS, "this" }, }; for (size_t i = 0; i < sizeof(keywords) / sizeof(keywords[0]); i++) { if (strcmp(token, keywords[i].string) == 0) { return keywords[i].id; } } return JACK_TOKENIZER_KEYWORD_UNKNOWN; }
それぞれ実装の詳細はソースコードを参照。
パーサ
ここでは下記の通りJack言語の文法に従い構文解析処理の実装を行いました。各実装はCompilationEngineモジュールを参照。
- CompilationEngine_compileClass関数
- class:
'class' className '{' classVarDec* subroutineDec* '}'
- class:
- CompilationEngine_compileClassVarDec関数
- classVarDec:
('static' | 'field') type varName (',' varName)* ';'
- classVarDec:
- CompilationEngine_compileSubroutine関数
- subroutineDec:
('constructor' | 'function' | 'method') ('void' | type) subroutineName '(' parameterList ')' subroutineBody
- subroutineBody:
'{' varDec* statements '}'
- subroutineDec:
- CompilationEngine_compileParameterList関数
- parameterList:
((type varName) (',' type varName)*)?
- parameterList:
- CompilationEngine_compileVarDec関数
- varDec:
'var' type varName (',' varName)* ';'
- varDec:
- CompilationEngine_compileStatements関数
- statements:
statement*
- statement:
letStatement | ifStatement | whileStatement | doStatement | returnStatement
- statements:
- CompilationEngine_compileDo関数
- doStatement:
'do' subroutineCall ';'
- subroutineCall:
subroutineName '(' expressionList ')' | (className | varName) '.' subroutineName '(' expressionList ')'
- doStatement:
- CompilationEngine_compileLet関数
- letStatement:
'let' varName ('[' expression ']')? '=' expression ';'
- letStatement:
- CompilationEngine_compileWhile関数
- whileStatement:
'while' '(' expression ')' '{' statements '}'
- whileStatement:
- CompilationEngine_compileReturn関数
- returnStatement:
'return' expression? ';'
- returnStatement:
- CompilationEngine_compileIf関数
- ifStatement:
'if' '(' expression ')' '{' statements '}' ('else' '{' statements '}')?
- ifStatement:
- CompilationEngine_compileExpression関数
- expression:
term (op term)*
- op:
'+' | '-' | '*' | '/' | '&' | '|' | '<' | '>' | '='
- expression:
- CompilationEngine_compileTerm関数
- term:
integerConstant | stringConstant | keywordConstant | varName | varName '[' expression ']' | subroutineCall | '(' expression ')' | unaryOp term
- subroutineCall:
subroutineName '(' expressionList ')' | (className | varName) '.' subroutineName '(' expressionList ')'
- unaryOp:
'-' | '~'
- term:
- CompilationEngine_compileExpressionList関数
- expressionList:
(expression (',' expression)*)?
- expressionList:
式を含まない版
ここではJack言語の文法のうちCompilationEngine_compileTerm
以外について実装しました。
下記で使えます。
cp -r ./nand2tetris/projects/10/ExpressionLessSquare 10/JackAnalyzer2/ && \ mkdir -p 10/JackAnalyzer2/ExpressionLessSquare/expect && \ mv 10/JackAnalyzer2/ExpressionLessSquare/*.xml 10/JackAnalyzer2/ExpressionLessSquare/expect/ cd 10/JackAnalyzer2/ clang --std=c11 -Wall -Wextra -o JackAnalyzer main.c JackTokenizer.c JackTokenizerPrivate.c CompilationEngine.c ./JackAnalyzer ExpressionLessSquare
main.c (JackAnalyzerモジュール)
下記の変更を行いました。
- 生成する
.xmlファイル
の名前をXxxT.xml
からXxx.xml
に変更 - analyze関数をCompilationEngineモジュールを用いるように変更
- writeXXX関数を削除(CompilationEngineモジュールへ移動)
差分は下記の通りです。
$ diff 10/JackAnalyzer/main.c 10/JackAnalyzer2/main.c 1c1 < #include "JackTokenizer.h" --- > #include "CompilationEngine.h" 2a3 > #include <stdbool.h> 7c8 < #define XML_FILENAME_MAX_LENGTH JACK_FILENAME_MAX_LENGTH // length('.jack') - length('T.xml') = 0 --- > #define XML_FILENAME_MAX_LENGTH (JACK_FILENAME_MAX_LENGTH - 1) // length('.jack') - length('.xml') = 1 17,23d17 < void writeTokens(FILE *fp, JackTokenizer tokenizer); < void writeToken(FILE *fp, JackTokenizer tokenizer); < void writeKeyword(FILE *fp, JackTokenizer tokenizer); < void writeSymbol(FILE *fp, JackTokenizer tokenizer); < void writeIdentifier(FILE *fp, JackTokenizer tokenizer); < void writeIntegerConstant(FILE *fp, JackTokenizer tokenizer); < void writeStringConstant(FILE *fp, JackTokenizer tokenizer); 130c124 < JackTokenizer tokenizer; --- > CompilationEngine compilationEngine; 143,144c137,138 < tokenizer = JackTokenizer_init(fpJack); < writeTokens(fpXml, tokenizer); --- > compilationEngine = CompilationEngine_init(fpJack, fpXml); > CompilationEngine_compileClass(compilationEngine); 180c174 < // xmlFilePath is {jackDirName}/{jackFileName} - ".jack" + "T.xml" --- > // xmlFilePath is {jackDirName}/{jackFileName} - ".jack" + ".xml" 184c178 < strcat(xmlFilePath, "T.xml"); --- > strcat(xmlFilePath, ".xml"); 189c183 < // XmlFilePath is {jackFileName} - ".jack" + "T.xml" --- > // XmlFilePath is {jackFileName} - ".jack" + ".xml" 194,308c188 < strcat(xmlFilePath, "T.xml"); < } < < void writeTokens(FILE *fp, JackTokenizer tokenizer) < { < fprintf(fp, "<tokens>\n"); < while (JackTokenizer_hasMoreTokens(tokenizer)) { < JackTokenizer_advance(tokenizer); < writeToken(fp, tokenizer); < } < fprintf(fp, "</tokens>\n"); < } < < void writeToken(FILE *fp, JackTokenizer tokenizer) < { < switch (JackTokenizer_tokenType(tokenizer)) < { < case JACK_TOKENIZER_TOKEN_TYPE_KEYWORD: < writeKeyword(fp, tokenizer); < break; < case JACK_TOKENIZER_TOKEN_TYPE_SYMBOL: < writeSymbol(fp, tokenizer); < break; < case JACK_TOKENIZER_TOKEN_TYPE_IDENTIFIER: < writeIdentifier(fp, tokenizer); < break; < case JACK_TOKENIZER_TOKEN_TYPE_INT_CONST: < writeIntegerConstant(fp, tokenizer); < break; < case JACK_TOKENIZER_TOKEN_TYPE_STRING_CONST: < writeStringConstant(fp, tokenizer); < break; < default: < break; < } < } < < void writeKeyword(FILE *fp, JackTokenizer tokenizer) < { < struct keyword { < JackTokenizer_Keyword id; < char *string; < }; < struct keyword keywords[] = { < { JACK_TOKENIZER_KEYWORD_CLASS, "class" }, < { JACK_TOKENIZER_KEYWORD_METHOD, "method" }, < { JACK_TOKENIZER_KEYWORD_FUNCTION, "function" }, < { JACK_TOKENIZER_KEYWORD_CONSTRUCTION, "constructor" }, < { JACK_TOKENIZER_KEYWORD_INT, "int" }, < { JACK_TOKENIZER_KEYWORD_BOOLEAN, "boolean" }, < { JACK_TOKENIZER_KEYWORD_CHAR, "char" }, < { JACK_TOKENIZER_KEYWORD_VOID, "void" }, < { JACK_TOKENIZER_KEYWORD_VAR, "var" }, < { JACK_TOKENIZER_KEYWORD_STATIC, "static" }, < { JACK_TOKENIZER_KEYWORD_FIELD, "field" }, < { JACK_TOKENIZER_KEYWORD_LET, "let" }, < { JACK_TOKENIZER_KEYWORD_DO, "do" }, < { JACK_TOKENIZER_KEYWORD_IF, "if" }, < { JACK_TOKENIZER_KEYWORD_ELSE, "else" }, < { JACK_TOKENIZER_KEYWORD_WHILE, "while" }, < { JACK_TOKENIZER_KEYWORD_RETURN, "return" }, < { JACK_TOKENIZER_KEYWORD_TRUE, "true" }, < { JACK_TOKENIZER_KEYWORD_FALSE, "false" }, < { JACK_TOKENIZER_KEYWORD_NULL, "null" }, < { JACK_TOKENIZER_KEYWORD_THIS, "this" }, < }; < JackTokenizer_Keyword id = JackTokenizer_keyword(tokenizer); < < fprintf(fp, "<keyword> "); < for (size_t i = 0; i < sizeof(keywords) / sizeof(keywords[0]); i++) { < if (id == keywords[i].id) { < fprintf(fp, "%s", keywords[i].string); < break; < } < } < fprintf(fp, " </keyword>\n"); < } < < void writeSymbol(FILE *fp, JackTokenizer tokenizer) < { < char token[JACK_TOKEN_SIZE]; < JackTokenizer_symbol(tokenizer, token); < < fprintf(fp, "<symbol> "); < if (strcmp(token, "<") == 0) { < fprintf(fp, "<"); < } else if (strcmp(token, ">") == 0) { < fprintf(fp, ">"); < } else if (strcmp(token, "&") == 0) { < fprintf(fp, "&"); < } else { < fprintf(fp, "%s", token); < } < fprintf(fp, " </symbol>\n"); < } < < void writeIdentifier(FILE *fp, JackTokenizer tokenizer) < { < char token[JACK_TOKEN_SIZE]; < JackTokenizer_identifier(tokenizer, token); < fprintf(fp, "<identifier> %s </identifier>\n", token); < } < < void writeIntegerConstant(FILE *fp, JackTokenizer tokenizer) < { < int intVal; < JackTokenizer_intVal(tokenizer, &intVal); < fprintf(fp, "<integerConstant> %d </integerConstant>\n", intVal); < } < < void writeStringConstant(FILE *fp, JackTokenizer tokenizer) < { < char token[JACK_TOKEN_SIZE]; < JackTokenizer_stringVal(tokenizer, token); < fprintf(fp, "<stringConstant> %s </stringConstant>\n", token); --- > strcat(xmlFilePath, ".xml");
JackTokenizerモジュール
変更はありません。
$ diff 10/JackAnalyzer/JackTokenizer.h 10/JackAnalyzer2/JackTokenizer.h $ diff 10/JackAnalyzer/JackTokenizer.c 10/JackAnalyzer2/JackTokenizer.c $ diff 10/JackAnalyzer/JackTokenizerPrivate.h 10/JackAnalyzer2/JackTokenizerPrivate.h $ diff 10/JackAnalyzer/JackTokenizerPrivate.c 10/JackAnalyzer2/JackTokenizerPrivate.c
CompilationEngineモジュール
.jackファイル
を構文解析して.xmlファイル
に出力するためのモジュールです。
main.c
で利用する関数はCompilationEngine.h
で下記のように定義しました。compilation_engine構造体はtypedefして定義はCompilationEngine.c
内に隠蔽するようにしてオブジェクトとして使うようにしました。それぞれの実装はCompilationEngine.c
で行いました。
10/JackAnalyzer2/CompilationEngine.h:L6-L22
typedef struct compilation_engine * CompilationEngine; CompilationEngine CompilationEngine_init(FILE *fpJack, FILE *fpXml); void CompilationEngine_compileClass(CompilationEngine thisObject); void CompilationEngine_compileClassVarDec(CompilationEngine thisObject); void CompilationEngine_compileSubroutine(CompilationEngine thisObject); void CompilationEngine_compileParameterList(CompilationEngine thisObject); void CompilationEngine_compileVarDec(CompilationEngine thisObject); void CompilationEngine_compileStatements(CompilationEngine thisObject); void CompilationEngine_compileDo(CompilationEngine thisObject); void CompilationEngine_compileLet(CompilationEngine thisObject); void CompilationEngine_compileWhile(CompilationEngine thisObject); void CompilationEngine_compileReturn(CompilationEngine thisObject); void CompilationEngine_compileIf(CompilationEngine thisObject); void CompilationEngine_compileExpression(CompilationEngine thisObject); void CompilationEngine_compileTerm(CompilationEngine thisObject); void CompilationEngine_compileExpressionList(CompilationEngine thisObject);
またCompilationEngine.c
内で使う関数は下記の通りです。writeXXX
関数はmain.cから移動してきました。
10/JackAnalyzer2/CompilationEngine.c:L6-L15
void writeToken(FILE *fp, JackTokenizer tokenizer); void writeKeyword(FILE *fp, JackTokenizer tokenizer); void writeSymbol(FILE *fp, JackTokenizer tokenizer); void writeIdentifier(FILE *fp, JackTokenizer tokenizer); void writeIntegerConstant(FILE *fp, JackTokenizer tokenizer); void writeStringConstant(FILE *fp, JackTokenizer tokenizer); void advanceAndWriteToken(CompilationEngine thisObject); bool isKeywordToken(CompilationEngine thisObject, JackTokenizer_Keyword keyword); bool isSymbolToken(CompilationEngine thisObject, char *symbol); bool inSymbolListToken(CompilationEngine thisObject, ...);
Jack言語の文法にしたがって構文解析して.xmlファイル
に出力する実装は下記の通りです。ほぼLL(1)文法であるため割とシンプルです。なお、CompilationEngine_compileTerm
だけは次節で実装するためきちんとした実装にはなっていません。
10/JackAnalyzer2/CompilationEngine.c:L34-L359
// 'class' className '{' classVarDec* subroutineDec* '}' void CompilationEngine_compileClass(CompilationEngine thisObject) { fprintf(thisObject->fpXml, "<class>\n"); advanceAndWriteToken(thisObject); // 'class' advanceAndWriteToken(thisObject); // className advanceAndWriteToken(thisObject); // '{' JackTokenizer_advance(thisObject->tokenizer); // '}' or not while (! isSymbolToken(thisObject, "}")) { if ( isKeywordToken(thisObject, JACK_TOKENIZER_KEYWORD_STATIC) || isKeywordToken(thisObject, JACK_TOKENIZER_KEYWORD_FIELD) ) { // classVarDec CompilationEngine_compileClassVarDec(thisObject); } else if ( isKeywordToken(thisObject, JACK_TOKENIZER_KEYWORD_CONSTRUCTION) || isKeywordToken(thisObject, JACK_TOKENIZER_KEYWORD_FUNCTION) || isKeywordToken(thisObject, JACK_TOKENIZER_KEYWORD_METHOD) ) { // subroutineDec CompilationEngine_compileSubroutine(thisObject); } else { break; } } writeToken(thisObject->fpXml ,thisObject->tokenizer); // '}' fprintf(thisObject->fpXml, "</class>\n"); } // ('static' | 'field') type varName (',' varName)* ';' void CompilationEngine_compileClassVarDec(CompilationEngine thisObject) { fprintf(thisObject->fpXml, "<classVarDec>\n"); writeToken(thisObject->fpXml ,thisObject->tokenizer); // ('static' | 'field') advanceAndWriteToken(thisObject); // type do { advanceAndWriteToken(thisObject); // varName advanceAndWriteToken(thisObject); // ',' or ';' } while (! isSymbolToken(thisObject, ";")); fprintf(thisObject->fpXml, "</classVarDec>\n"); JackTokenizer_advance(thisObject->tokenizer); } // ('constructor' | 'function' | 'method') ('void' | type) subroutineName '(' parameterList ')' subroutineBody // subroutineBody: '{' varDec* statements '}' void CompilationEngine_compileSubroutine(CompilationEngine thisObject) { fprintf(thisObject->fpXml, "<subroutineDec>\n"); writeToken(thisObject->fpXml ,thisObject->tokenizer); // ('constructor' | 'function' | 'method') advanceAndWriteToken(thisObject); // ('void' | type) advanceAndWriteToken(thisObject); // subroutineName advanceAndWriteToken(thisObject); // '(' JackTokenizer_advance(thisObject->tokenizer); CompilationEngine_compileParameterList(thisObject); writeToken(thisObject->fpXml ,thisObject->tokenizer); // ')' fprintf(thisObject->fpXml, "<subroutineBody>\n"); advanceAndWriteToken(thisObject); // '{' JackTokenizer_advance(thisObject->tokenizer); // 'var' or statements or '}' while (isKeywordToken(thisObject, JACK_TOKENIZER_KEYWORD_VAR)) { CompilationEngine_compileVarDec(thisObject); } if (! isSymbolToken(thisObject, "}")) { // statements or '}' CompilationEngine_compileStatements(thisObject); } writeToken(thisObject->fpXml, thisObject->tokenizer); // '}' fprintf(thisObject->fpXml, "</subroutineBody>\n"); fprintf(thisObject->fpXml, "</subroutineDec>\n"); JackTokenizer_advance(thisObject->tokenizer); } // ((type varName) (',' type varName)*)? void CompilationEngine_compileParameterList(CompilationEngine thisObject) { fprintf(thisObject->fpXml, "<parameterList>\n"); if (JackTokenizer_tokenType(thisObject->tokenizer) == JACK_TOKENIZER_TOKEN_TYPE_KEYWORD) { // type writeToken(thisObject->fpXml ,thisObject->tokenizer); // type advanceAndWriteToken(thisObject); // varName JackTokenizer_advance(thisObject->tokenizer); // ',' or not while (isSymbolToken(thisObject, ",")) { writeToken(thisObject->fpXml ,thisObject->tokenizer); // ',' advanceAndWriteToken(thisObject); // type advanceAndWriteToken(thisObject); // varName JackTokenizer_advance(thisObject->tokenizer); // ',' or not } } fprintf(thisObject->fpXml, "</parameterList>\n"); } // 'var' type varName (',' varName)* ';' void CompilationEngine_compileVarDec(CompilationEngine thisObject) { fprintf(thisObject->fpXml, "<varDec>\n"); writeToken(thisObject->fpXml, thisObject->tokenizer); // 'var' advanceAndWriteToken(thisObject); // type do { advanceAndWriteToken(thisObject); // varName advanceAndWriteToken(thisObject); // ',' or ';' } while (! isSymbolToken(thisObject, ";")); fprintf(thisObject->fpXml, "</varDec>\n"); JackTokenizer_advance(thisObject->tokenizer); } // statement* // statement: letStatement | ifStatement | whileStatement | doStatement | returnStatement void CompilationEngine_compileStatements(CompilationEngine thisObject) { fprintf(thisObject->fpXml, "<statements>\n"); do { if (isKeywordToken(thisObject, JACK_TOKENIZER_KEYWORD_LET)) { CompilationEngine_compileLet(thisObject); } else if (isKeywordToken(thisObject, JACK_TOKENIZER_KEYWORD_IF)) { CompilationEngine_compileIf(thisObject); } else if (isKeywordToken(thisObject, JACK_TOKENIZER_KEYWORD_WHILE)) { CompilationEngine_compileWhile(thisObject); } else if (isKeywordToken(thisObject, JACK_TOKENIZER_KEYWORD_DO)) { CompilationEngine_compileDo(thisObject); } else if (isKeywordToken(thisObject, JACK_TOKENIZER_KEYWORD_RETURN)) { CompilationEngine_compileReturn(thisObject); } else { break; } } while (true); fprintf(thisObject->fpXml, "</statements>\n"); } // 'do' subroutineCall ';' // subroutineCall: subroutineName '(' expressionList ')' | (className | varName) '.' subroutineName '(' expressionList ')' void CompilationEngine_compileDo(CompilationEngine thisObject) { fprintf(thisObject->fpXml, "<doStatement>\n"); writeToken(thisObject->fpXml ,thisObject->tokenizer); // 'do' advanceAndWriteToken(thisObject); // subroutineName | (className | varName) advanceAndWriteToken(thisObject); // '(' or '.' if (isSymbolToken(thisObject, ".")) { advanceAndWriteToken(thisObject); // subroutineName advanceAndWriteToken(thisObject); // '(' } JackTokenizer_advance(thisObject->tokenizer); CompilationEngine_compileExpressionList(thisObject); writeToken(thisObject->fpXml ,thisObject->tokenizer); // ')' advanceAndWriteToken(thisObject); // ';' fprintf(thisObject->fpXml, "</doStatement>\n"); JackTokenizer_advance(thisObject->tokenizer); } // 'let' varName ('[' expression ']')? '=' expression ';' void CompilationEngine_compileLet(CompilationEngine thisObject) { fprintf(thisObject->fpXml, "<letStatement>\n"); writeToken(thisObject->fpXml ,thisObject->tokenizer); // 'let' advanceAndWriteToken(thisObject); // varName advanceAndWriteToken(thisObject); // '[' or '=' if (isSymbolToken(thisObject, "[")) { JackTokenizer_advance(thisObject->tokenizer); CompilationEngine_compileExpression(thisObject); writeToken(thisObject->fpXml ,thisObject->tokenizer); // ']' advanceAndWriteToken(thisObject); // '=' } JackTokenizer_advance(thisObject->tokenizer); CompilationEngine_compileExpression(thisObject); writeToken(thisObject->fpXml ,thisObject->tokenizer); // ';' fprintf(thisObject->fpXml, "</letStatement>\n"); JackTokenizer_advance(thisObject->tokenizer); } // 'while' '(' expression ')' '{' statements '}' void CompilationEngine_compileWhile(CompilationEngine thisObject) { fprintf(thisObject->fpXml, "<whileStatement>\n"); writeToken(thisObject->fpXml ,thisObject->tokenizer); // 'while' advanceAndWriteToken(thisObject); // '(' JackTokenizer_advance(thisObject->tokenizer); CompilationEngine_compileExpression(thisObject); writeToken(thisObject->fpXml ,thisObject->tokenizer); // ')' advanceAndWriteToken(thisObject); // '{' JackTokenizer_advance(thisObject->tokenizer); CompilationEngine_compileStatements(thisObject); writeToken(thisObject->fpXml ,thisObject->tokenizer); // '}' fprintf(thisObject->fpXml, "</whileStatement>\n"); JackTokenizer_advance(thisObject->tokenizer); } // 'return' expression? ';' void CompilationEngine_compileReturn(CompilationEngine thisObject) { fprintf(thisObject->fpXml, "<returnStatement>\n"); writeToken(thisObject->fpXml ,thisObject->tokenizer); // 'return' JackTokenizer_advance(thisObject->tokenizer); // expression or ';' if (! isSymbolToken(thisObject, ";")) { CompilationEngine_compileExpression(thisObject); } writeToken(thisObject->fpXml ,thisObject->tokenizer); // ';' fprintf(thisObject->fpXml, "</returnStatement>\n"); JackTokenizer_advance(thisObject->tokenizer); } // 'if' '(' expression ')' '{' statements '}' ('else' '{' statements '}')? void CompilationEngine_compileIf(CompilationEngine thisObject) { fprintf(thisObject->fpXml, "<ifStatement>\n"); writeToken(thisObject->fpXml ,thisObject->tokenizer); // 'if' advanceAndWriteToken(thisObject); // '(' JackTokenizer_advance(thisObject->tokenizer); CompilationEngine_compileExpression(thisObject); writeToken(thisObject->fpXml ,thisObject->tokenizer); // ')' advanceAndWriteToken(thisObject); // '{' JackTokenizer_advance(thisObject->tokenizer); CompilationEngine_compileStatements(thisObject); writeToken(thisObject->fpXml ,thisObject->tokenizer); // '}' JackTokenizer_advance(thisObject->tokenizer); // 'else' or not if (isKeywordToken(thisObject, JACK_TOKENIZER_KEYWORD_ELSE)) { writeToken(thisObject->fpXml ,thisObject->tokenizer); // 'else' advanceAndWriteToken(thisObject); // '{' JackTokenizer_advance(thisObject->tokenizer); CompilationEngine_compileStatements(thisObject); writeToken(thisObject->fpXml ,thisObject->tokenizer); // '}' JackTokenizer_advance(thisObject->tokenizer); } fprintf(thisObject->fpXml, "</ifStatement>\n"); } // term (op term)* // op: '+' | '-' | '*' | '/' | '&' | '|' | '<' | '>' | '=' void CompilationEngine_compileExpression(CompilationEngine thisObject) { fprintf(thisObject->fpXml, "<expression>\n"); CompilationEngine_compileTerm(thisObject); while (inSymbolListToken(thisObject, "+", "-", "*", "/", "&", "|", "<", ">", "=", NULL)) { writeToken(thisObject->fpXml ,thisObject->tokenizer); // op JackTokenizer_advance(thisObject->tokenizer); CompilationEngine_compileTerm(thisObject); } fprintf(thisObject->fpXml, "</expression>\n"); } // integerConstant | stringConstant | keywordConstant | varName | varName '[' expression ']' | subroutineCall | '(' expression ')' | unaryOp term void CompilationEngine_compileTerm(CompilationEngine thisObject) { fprintf(thisObject->fpXml, "<term>\n"); writeToken(thisObject->fpXml ,thisObject->tokenizer); // term fprintf(thisObject->fpXml, "</term>\n"); JackTokenizer_advance(thisObject->tokenizer); } // (expression (',' expression)*)? void CompilationEngine_compileExpressionList(CompilationEngine thisObject) { fprintf(thisObject->fpXml, "<expressionList>\n"); if (! isSymbolToken(thisObject, ")")) { // expression is not ')' CompilationEngine_compileExpression(thisObject); while (isSymbolToken(thisObject, ",")) { writeToken(thisObject->fpXml ,thisObject->tokenizer); // ',' JackTokenizer_advance(thisObject->tokenizer); // expression CompilationEngine_compileExpression(thisObject); } } fprintf(thisObject->fpXml, "</expressionList>\n"); }
それぞれ実装の詳細はソースコードを参照。
完全版
ここではJack言語の文法のCompilationEngine_compileTerm
を実装しました。
下記で使えます。
cp -r ./nand2tetris/projects/10/Square 10/JackAnalyzer3/ && \ mkdir -p 10/JackAnalyzer3/Square/expect && \ mv 10/JackAnalyzer3/Square/*.xml 10/JackAnalyzer3/Square/expect/ # ...(省略)... cd 10/JackAnalyzer3/ clang --std=c11 -Wall -Wextra -o JackAnalyzer main.c JackTokenizer.c JackTokenizerPrivate.c CompilationEngine.c ./JackAnalyzer Square
main.c (JackAnalyzerモジュール)
変更はありません。
$ diff 10/JackAnalyzer2/main.c 10/JackAnalyzer3/main.c
JackTokenizerモジュール
変更はありません。
$ diff 10/JackAnalyzer2/JackTokenizer.h 10/JackAnalyzer3/JackTokenizer.h $ diff 10/JackAnalyzer2/JackTokenizer.c 10/JackAnalyzer3/JackTokenizer.c $ diff 10/JackAnalyzer2/JackTokenizerPrivate.h 10/JackAnalyzer3/JackTokenizerPrivate.h $ diff 10/JackAnalyzer2/JackTokenizerPrivate.c 10/JackAnalyzer3/JackTokenizerPrivate.c
CompilationEngineモジュール
CompilationEngine_compileTerm
関数を実装しました。
10/JackAnalyzer3/CompilationEngine.c:L330-L388
// integerConstant | stringConstant | keywordConstant | varName | varName '[' expression ']' | subroutineCall | '(' expression ')' | unaryOp term // subroutineCall: subroutineName '(' expressionList ')' | (className | varName) '.' subroutineName '(' expressionList ')' // unaryOp: '-' | '~' void CompilationEngine_compileTerm(CompilationEngine thisObject) { fprintf(thisObject->fpXml, "<term>\n"); if ( JackTokenizer_tokenType(thisObject->tokenizer) == JACK_TOKENIZER_TOKEN_TYPE_INT_CONST || JackTokenizer_tokenType(thisObject->tokenizer) == JACK_TOKENIZER_TOKEN_TYPE_STRING_CONST || JackTokenizer_tokenType(thisObject->tokenizer) == JACK_TOKENIZER_TOKEN_TYPE_KEYWORD ) { // integerConstant | stringConstant | keywordConstant writeToken(thisObject->fpXml ,thisObject->tokenizer); // integerConstant or stringConstant or keywordConstant JackTokenizer_advance(thisObject->tokenizer); } else if (isSymbolToken(thisObject, "(")) { // '(' expression ')' writeToken(thisObject->fpXml ,thisObject->tokenizer); // '(' JackTokenizer_advance(thisObject->tokenizer); CompilationEngine_compileExpression(thisObject); writeToken(thisObject->fpXml ,thisObject->tokenizer); // ')' JackTokenizer_advance(thisObject->tokenizer); } else if (inSymbolListToken(thisObject, "-", "~", NULL)) { // unaryOp term writeToken(thisObject->fpXml ,thisObject->tokenizer); // unaryOp JackTokenizer_advance(thisObject->tokenizer); CompilationEngine_compileTerm(thisObject); } else { // varName | varName '[' expression ']' | subroutineCall writeToken(thisObject->fpXml ,thisObject->tokenizer); // varName | subroutineName | className JackTokenizer_advance(thisObject->tokenizer); // '[' or '(' or '.' or not if (isSymbolToken(thisObject, "[")) { writeToken(thisObject->fpXml ,thisObject->tokenizer); JackTokenizer_advance(thisObject->tokenizer); CompilationEngine_compileExpression(thisObject); writeToken(thisObject->fpXml ,thisObject->tokenizer); // ']' JackTokenizer_advance(thisObject->tokenizer); } else if (inSymbolListToken(thisObject, "(", ".", NULL)) { writeToken(thisObject->fpXml ,thisObject->tokenizer); // '(' or '.' if (isSymbolToken(thisObject, ".")) { advanceAndWriteToken(thisObject); // subroutineName advanceAndWriteToken(thisObject); // '(' } JackTokenizer_advance(thisObject->tokenizer); CompilationEngine_compileExpressionList(thisObject); writeToken(thisObject->fpXml ,thisObject->tokenizer); // ')' JackTokenizer_advance(thisObject->tokenizer); } } fprintf(thisObject->fpXml, "</term>\n"); }
まとめ
今回はJack言語の文法にしたがった構文解析器を実装しました。11章は今回の構文解析器を改造してコード生成を行えるようにしてコンパイラを完成させるようです。11章にはシンボルテーブルのテストが用意されていなかったりテストプログラムが目視テストだったりと大変そうですが時間があれば挑んでみたいです。