/* * Assignment: Tiny C Parser * Student ID: 24412014 * Name: Tomoya Kitamura * Date: 2026/01/30 */ #include #include #include #include /* * ========================================================= * Content from distributed define_token.h * ========================================================= */ #define START_TOKEN 0 #define EOD 1 #define UNKNOWN 2 #define NAME 10 #define NUMERIC 11 #define BOOLEAN_VAL 12 #define REV_TRUE 21 #define REV_FALSE 22 #define REV_PERCENT_D 23 #define REV_PERCENT_T 24 #define DEC_INT 31 #define REV_MAIN 32 #define REV_IF 33 #define REV_ELSE 34 #define REV_WHILE 35 #define SCANF 36 #define PRINTF 37 #define L_BRACKET 51 // { #define R_BRACKET 52 // } #define COMMA 53 // , #define SEMICOLON 54 // ; #define EQUAL 55 // = #define PLUS 56 // + #define MINUS 57 // - #define ESUAS 58 // ! #define ASTERRISK 59 // * #define SLASH 60 // / #define L_PAREN 61 // ( #define R_PAREN 62 // ) #define LOGICAL_AND 81 #define LOGICAL_OR 82 #define GREATER 83 #define GREATER_EQUAL 84 #define LESS 85 #define LESS_EQUAL 86 #define RQUAL 87 #define NOT_EQUAL 88 #define AMPERSAND 89 #define PERCENT_D 101 #define PERCENT_DN 102 #define PERCENT_DT 103 #define BLANK ' ' // Structure for reserved words struct rev_word_typ { int token_num; char rev_word[16]; }; // Definition of reserved words struct rev_word_typ rev_word[] = { {REV_TRUE, "true"}, {REV_FALSE, "false"}, {DEC_INT, "int"}, {REV_MAIN, "main"}, {REV_IF, "if"}, {REV_ELSE, "else"}, {REV_WHILE, "while"}, {SCANF, "scanf"}, {PRINTF, "printf"} }; char *formatter[] = {"%d", "%d\\n", "%d\\t"}; // Structure for token object struct token_obj_typ { char id[256]; int num; int sym; }; // Current token struct token_obj_typ current_token; // Variables for position tracking int line_count = 0; int column_count = 0; // Added: Column number /* * ========================================================= * Symbol Table Implementation * ========================================================= */ #define MAX_VARS 100 #define MAX_VAR_LEN 16 char symbol_table[MAX_VARS][MAX_VAR_LEN]; int symbol_count = 0; void error_exit(char *msg); // Forward declaration // Add variable to symbol table (Check Duplicate) void add_symbol(char *name) { int i; for (i = 0; i < symbol_count; i++) { if (strcmp(symbol_table[i], name) == 0) { error_exit("Duplicate variable definition"); } } if (symbol_count >= MAX_VARS) { error_exit("Too many variables"); } // Safe copy strncpy(symbol_table[symbol_count], name, MAX_VAR_LEN - 1); symbol_table[symbol_count][MAX_VAR_LEN - 1] = '\0'; symbol_count++; } // Check if variable exists (Check Undefined) void check_symbol_exists(char *name) { int i; for (i = 0; i < symbol_count; i++) { if (strcmp(symbol_table[i], name) == 0) { return; // Found } } error_exit("Undefined variable used"); } /* * ========================================================= * Lexical Analysis Part (getsym) * ========================================================= */ #define ONE_LINE_LENGTH 256 #define REV_WORD_LENGTH 9 #define EOL -1 #define NULL_CHARACTER '\0' #define BLANK_CHARACTER ' ' #define SLASH_CHARACTER '/' // Function to identify reserved words int identify_rev_word(char *p) { int i, rtn; for(i=0; i < REV_WORD_LENGTH && strcmp(p, rev_word[i].rev_word); i++); if (i == REV_WORD_LENGTH) rtn = 0; else rtn = rev_word[i].token_num; return rtn; } // Function to identify symbols int identify_symbol(char **ptr) { char cc = **ptr; char *cp = *ptr; char *bptr; int sm = UNKNOWN; int i; switch (cc) { case '{': sm = L_BRACKET; (*ptr)++; break; case '}': sm = R_BRACKET; (*ptr)++; break; case '(': sm = L_PAREN; (*ptr)++; break; case ')': sm = R_PAREN; (*ptr)++; break; case ',': sm = COMMA; (*ptr)++; break; case ';': sm = SEMICOLON; (*ptr)++; break; case '=': (*ptr)++; if (**ptr == '=') { sm = RQUAL; (*ptr)++; } else { sm = EQUAL; } break; case '&': (*ptr)++; if (**ptr == '&') { sm = LOGICAL_AND; (*ptr)++; } else { sm = AMPERSAND; } break; case '|': (*ptr)++; if (**ptr == '|') { sm = LOGICAL_OR; (*ptr)++; } else { sm = UNKNOWN; } break; case '>': (*ptr)++; if (**ptr == '=') { sm = GREATER_EQUAL; (*ptr)++; } else { sm = GREATER; } break; case '<': (*ptr)++; if (**ptr == '=') { sm = LESS_EQUAL; (*ptr)++; } else { sm = LESS; } break; case '+': sm = PLUS; (*ptr)++; break; case '-': sm = MINUS; (*ptr)++; break; case '!': (*ptr)++; if (**ptr == '=') { sm = NOT_EQUAL; (*ptr)++; } else { sm = ESUAS; } break; case '*': sm = ASTERRISK; (*ptr)++; break; case '/': sm = SLASH; (*ptr)++; break; case '"': (*ptr)++; bptr = *ptr; while (**ptr != '"' && **ptr != NULL_CHARACTER) (*ptr)++; if (**ptr == NULL_CHARACTER) { sm = UNKNOWN; } else { for(i=0; i<3; i++) { if (!strncmp(bptr, formatter[i], (int)(*ptr - bptr))) break; } if (i == 3) sm = UNKNOWN; else sm = PERCENT_D + i; } (*ptr)++; break; } return sm; } // Set values to token object void set_token_obj(char *buffer, int ssym) { switch (ssym) { case NAME: strcpy(current_token.id, buffer); current_token.sym = ssym; break; case NUMERIC: current_token.sym = ssym; current_token.num = atoi(buffer); break; default: current_token.sym = ssym; break; } } // Function to read one line int get_line(char *buffer) { int ch='\0'; char *ptr = buffer; int counter=0; while ((ch = getchar()) != EOF) { if (ch == '\n' || ch == 0x0a) { *ptr = ' '; ptr++; counter++; *ptr = NULL_CHARACTER; // Count line number line_count++; break; } else { *ptr = ch; ptr++; counter++; } } if (counter) return counter; else return EOL; } // Main function to get token (getsym) int getsym() { static int ssym = START_TOKEN; static char one_line[ONE_LINE_LENGTH]; static char *ptr = one_line; char buffer[ONE_LINE_LENGTH], *buffer_p=buffer; int res=0; // If buffer is empty, read new line while (ptr == one_line && res == 0) { res = get_line(one_line); if (res == EOL) { ptr = one_line; return EOD; // End of file } // Skip comments while (*ptr != NULL_CHARACTER) { if (*ptr == BLANK_CHARACTER) {ptr++; continue;} if (*ptr == SLASH_CHARACTER && *(ptr+1) == SLASH_CHARACTER) *ptr = NULL_CHARACTER; else break; } if (*ptr == NULL_CHARACTER) { ptr = one_line; res = 0; } } // Capture column position (1-based index) // ptr points to the start of the current token column_count = (int)(ptr - one_line) + 1; // Alpha: variable or reserved word if (isalpha(*ptr)) { while (isalpha(*ptr) || isdigit(*ptr)) { *buffer_p = *ptr; buffer_p++; ptr++; } *buffer_p = NULL_CHARACTER; if ((ssym = identify_rev_word(buffer)) == 0) { ssym = NAME; // Check variable length (Max 15 chars) if (strlen(buffer) > 15) { printf("Syntax Error (Line: %d, Column: %d): Variable name too long (Max 15)\n", line_count, column_count); exit(1); } } set_token_obj(buffer, ssym); } // Digit: number else if (isdigit(*ptr)) { while (isdigit(*ptr)) { *buffer_p = *ptr; ptr++; buffer_p++; } *buffer_p = NULL_CHARACTER; ssym = NUMERIC; set_token_obj(buffer, ssym); } // Others: symbols else { ssym = identify_symbol(&ptr); set_token_obj(buffer, ssym); } while (*ptr == BLANK_CHARACTER) ptr++; if (*ptr == NULL_CHARACTER || (*ptr == SLASH_CHARACTER && *(ptr+1) == SLASH_CHARACTER)) ptr = one_line; return ssym; } /* * ========================================================= * Syntax Analysis Part (Fixed Version with Column info) * ========================================================= */ void program(); void var_def(); void statement(); void expression(); void factor(); // Updated error function to show Column void error_exit(char *msg) { printf("Syntax Error (Line: %d, Column: %d): %s\n", line_count, column_count, msg); exit(1); } void check_symbol(int expected, char *err_msg) { if (current_token.sym == expected) { getsym(); } else { error_exit(err_msg); } } // Check if token is a binary operator int is_binary_operator(int sym) { if (sym == PLUS || sym == MINUS || sym == ASTERRISK || sym == SLASH || sym == LOGICAL_AND || sym == LOGICAL_OR || sym == GREATER || sym == GREATER_EQUAL || sym == LESS || sym == LESS_EQUAL || sym == RQUAL || sym == NOT_EQUAL) { return 1; } return 0; } /* * Factor */ void factor() { // Unary operator if (current_token.sym == PLUS || current_token.sym == MINUS || current_token.sym == ESUAS) { getsym(); factor(); } // Parenthesis else if (current_token.sym == L_PAREN) { getsym(); expression(); check_symbol(R_PAREN, "Expected ')'"); } // Constants else if (current_token.sym == NUMERIC || current_token.sym == REV_TRUE || current_token.sym == REV_FALSE) { getsym(); } // Variable else if (current_token.sym == NAME) { check_symbol_exists(current_token.id); getsym(); } else { error_exit("Invalid expression start"); } } /* * Expression (Left Associative) */ void expression() { factor(); while (is_binary_operator(current_token.sym)) { getsym(); factor(); } } void statement() { // Assignment if (current_token.sym == NAME) { check_symbol_exists(current_token.id); getsym(); check_symbol(EQUAL, "Expected '=' for assignment"); expression(); check_symbol(SEMICOLON, "Expected ';'"); } // if else if (current_token.sym == REV_IF) { getsym(); check_symbol(L_PAREN, "Expected '(' after if"); expression(); check_symbol(R_PAREN, "Expected ')' after condition"); statement(); if (current_token.sym == REV_ELSE) { getsym(); statement(); } } // while else if (current_token.sym == REV_WHILE) { getsym(); check_symbol(L_PAREN, "Expected '(' after while"); expression(); check_symbol(R_PAREN, "Expected ')' after condition"); statement(); } // Block else if (current_token.sym == L_BRACKET) { getsym(); while (current_token.sym != R_BRACKET && current_token.sym != EOD) { statement(); } check_symbol(R_BRACKET, "Expected '}' at end of block"); } // scanf else if (current_token.sym == SCANF) { getsym(); check_symbol(L_PAREN, "Expected '(' after scanf"); check_symbol(PERCENT_D, "Expected \"%d\" in scanf"); check_symbol(COMMA, "Expected ','"); check_symbol(AMPERSAND, "Expected '&'"); if (current_token.sym == NAME) { check_symbol_exists(current_token.id); getsym(); } else { error_exit("Expected variable name"); } check_symbol(R_PAREN, "Expected ')'"); check_symbol(SEMICOLON, "Expected ';'"); } // printf else if (current_token.sym == PRINTF) { getsym(); check_symbol(L_PAREN, "Expected '(' after printf"); if (current_token.sym == PERCENT_D || current_token.sym == PERCENT_DN || current_token.sym == PERCENT_DT) { getsym(); } else { error_exit("Invalid format in printf"); } check_symbol(COMMA, "Expected ','"); if (current_token.sym == NAME) { check_symbol_exists(current_token.id); getsym(); } else { error_exit("Expected variable name in printf"); } check_symbol(R_PAREN, "Expected ')'"); check_symbol(SEMICOLON, "Expected ';'"); } // Empty statement else if (current_token.sym == SEMICOLON) { getsym(); } else { error_exit("Unknown statement"); } } void var_def() { if (current_token.sym == DEC_INT) { getsym(); while(1) { if (current_token.sym == NAME) { add_symbol(current_token.id); getsym(); } else { error_exit("Expected variable name in definition"); } if (current_token.sym == COMMA) { getsym(); continue; } else { break; } } check_symbol(SEMICOLON, "Expected ';' after variable definition"); } } void program() { check_symbol(DEC_INT, "Program must start with 'int'"); check_symbol(REV_MAIN, "Expected 'main' function"); check_symbol(L_PAREN, "Expected '(' after main"); // Handle 'void' in main(void) if (current_token.sym == NAME && strcmp(current_token.id, "void") == 0) { getsym(); } check_symbol(R_PAREN, "Expected ')'"); check_symbol(L_BRACKET, "Expected '{' at start of body"); if (current_token.sym == DEC_INT) { var_def(); } while (current_token.sym != R_BRACKET && current_token.sym != EOD) { statement(); } check_symbol(R_BRACKET, "Expected '}' at end of program"); } int main() { getsym(); if (current_token.sym != EOD) { program(); } printf("No error.\n"); return 0; }