/*
* Assignment: Tiny C Parser (Fixed Version)
* Student ID: 24412014
* Name: Tomoya Kitamura
* Date: 2026/01/30
*/
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
/* * =========================================================
* Content from distributed define_token.h
* =========================================================
*/
#define START_TOKEN 0
#define EOD 1
#define UNKNOWN 2
#define NAME 10
#define NUMERIC 11
#define BOOLEAN_VAL 12
#define REV_TRUE 21
#define REV_FALSE 22
#define REV_PERCENT_D 23
#define REV_PERCENT_T 24
#define DEC_INT 31
#define REV_MAIN 32
#define REV_IF 33
#define REV_ELSE 34
#define REV_WHILE 35
#define SCANF 36
#define PRINTF 37
#define L_BRACKET 51 // {
#define R_BRACKET 52 // }
#define COMMA 53 // ,
#define SEMICOLON 54 // ;
#define EQUAL 55 // =
#define PLUS 56 // +
#define MINUS 57 // -
#define ESUAS 58 // !
#define ASTERRISK 59 // *
#define SLASH 60 // /
#define L_PAREN 61 // (
#define R_PAREN 62 // )
#define LOGICAL_AND 81
#define LOGICAL_OR 82
#define GREATER 83
#define GREATER_EQUAL 84
#define LESS 85
#define LESS_EQUAL 86
#define RQUAL 87
#define NOT_EQUAL 88
#define AMPERSAND 89
#define PERCENT_D 101
#define PERCENT_DN 102
#define PERCENT_DT 103
#define BLANK ' '
// Structure for reserved words
struct rev_word_typ {
int token_num;
char rev_word[16];
};
// Definition of reserved words
struct rev_word_typ rev_word[] = {
{REV_TRUE, "true"},
{REV_FALSE, "false"},
{DEC_INT, "int"},
{REV_MAIN, "main"},
{REV_IF, "if"},
{REV_ELSE, "else"},
{REV_WHILE, "while"},
{SCANF, "scanf"},
{PRINTF, "printf"}
};
char *formatter[] = {"%d", "%d\\n", "%d\\t"};
// Structure for token object
struct token_obj_typ {
char id[256];
int num;
int sym;
};
// Current token
struct token_obj_typ current_token;
// Variable to count line numbers for error display
int line_count = 0;
/* * =========================================================
* Symbol Table Implementation (Fix for "Fatal Error")
* =========================================================
*/
#define MAX_VARS 100
#define MAX_VAR_LEN 16
char symbol_table[MAX_VARS][MAX_VAR_LEN];
int symbol_count = 0;
void error_exit(char *msg); // Forward declaration
// Add variable to symbol table (Check Duplicate)
void add_symbol(char *name) {
int i;
for (i = 0; i < symbol_count; i++) {
if (strcmp(symbol_table[i], name) == 0) {
error_exit("Duplicate variable definition");
}
}
if (symbol_count >= MAX_VARS) {
error_exit("Too many variables");
}
// Safe copy
strncpy(symbol_table[symbol_count], name, MAX_VAR_LEN - 1);
symbol_table[symbol_count][MAX_VAR_LEN - 1] = '\0';
symbol_count++;
}
// Check if variable exists (Check Undefined)
void check_symbol_exists(char *name) {
int i;
for (i = 0; i < symbol_count; i++) {
if (strcmp(symbol_table[i], name) == 0) {
return; // Found
}
}
error_exit("Undefined variable used");
}
/* * =========================================================
* Lexical Analysis Part (getsym)
* =========================================================
*/
#define ONE_LINE_LENGTH 256
#define REV_WORD_LENGTH 9
#define EOL -1
#define NULL_CHARACTER '\0'
#define BLANK_CHARACTER ' '
#define SLASH_CHARACTER '/'
// Function to identify reserved words
int identify_rev_word(char *p) {
int i, rtn;
for(i=0; i < REV_WORD_LENGTH && strcmp(p, rev_word[i].rev_word); i++);
if (i == REV_WORD_LENGTH) rtn = 0; else rtn = rev_word[i].token_num;
return rtn;
}
// Function to identify symbols
int identify_symbol(char **ptr) {
char cc = **ptr;
char *cp = *ptr;
char *bptr;
int sm = UNKNOWN;
int i;
switch (cc) {
case '{': sm = L_BRACKET; (*ptr)++; break;
case '}': sm = R_BRACKET; (*ptr)++; break;
case '(': sm = L_PAREN; (*ptr)++; break;
case ')': sm = R_PAREN; (*ptr)++; break;
case ',': sm = COMMA; (*ptr)++; break;
case ';': sm = SEMICOLON; (*ptr)++; break;
case '=':
(*ptr)++;
if (**ptr == '=') { sm = RQUAL; (*ptr)++; } else { sm = EQUAL; }
break;
case '&':
(*ptr)++;
if (**ptr == '&') { sm = LOGICAL_AND; (*ptr)++; } else { sm = AMPERSAND; }
break;
case '|':
(*ptr)++;
if (**ptr == '|') { sm = LOGICAL_OR; (*ptr)++; } else { sm = UNKNOWN; }
break;
case '>':
(*ptr)++;
if (**ptr == '=') { sm = GREATER_EQUAL; (*ptr)++; } else { sm = GREATER; }
break;
case '<':
(*ptr)++;
if (**ptr == '=') { sm = LESS_EQUAL; (*ptr)++; } else { sm = LESS; }
break;
case '+': sm = PLUS; (*ptr)++; break;
case '-': sm = MINUS; (*ptr)++; break;
case '!':
(*ptr)++;
if (**ptr == '=') { sm = NOT_EQUAL; (*ptr)++; } else { sm = ESUAS; }
break;
case '*': sm = ASTERRISK; (*ptr)++; break;
case '/': sm = SLASH; (*ptr)++; break;
case '"':
(*ptr)++;
bptr = *ptr;
while (**ptr != '"' && **ptr != NULL_CHARACTER) (*ptr)++;
if (**ptr == NULL_CHARACTER) {
sm = UNKNOWN;
} else {
for(i=0; i<3; i++) {
if (!strncmp(bptr, formatter[i], (int)(*ptr - bptr))) break;
}
if (i == 3) sm = UNKNOWN; else sm = PERCENT_D + i;
}
(*ptr)++;
break;
}
return sm;
}
// Set values to token object
void set_token_obj(char *buffer, int ssym) {
switch (ssym) {
case NAME:
strcpy(current_token.id, buffer);
current_token.sym = ssym;
break;
case NUMERIC:
current_token.sym = ssym;
current_token.num = atoi(buffer);
break;
default:
current_token.sym = ssym;
break;
}
}
// Function to read one line
int get_line(char *buffer) {
int ch='\0';
char *ptr = buffer;
int counter=0;
while ((ch = getchar()) != EOF) {
if (ch == '\n' || ch == 0x0a) {
*ptr = ' ';
ptr++;
counter++;
*ptr = NULL_CHARACTER;
// Count line number
line_count++;
break;
} else {
*ptr = ch;
ptr++;
counter++;
}
}
if (counter) return counter; else return EOL;
}
// Main function to get token (getsym)
int getsym() {
static int ssym = START_TOKEN;
static char one_line[ONE_LINE_LENGTH];
static char *ptr = one_line;
char buffer[ONE_LINE_LENGTH], *buffer_p=buffer;
int res=0;
// If buffer is empty, read new line
while (ptr == one_line && res == 0) {
res = get_line(one_line);
if (res == EOL) {
ptr = one_line;
return EOD; // End of file
}
// Skip comments
while (*ptr != NULL_CHARACTER) {
if (*ptr == BLANK_CHARACTER) {ptr++; continue;}
if (*ptr == SLASH_CHARACTER && *(ptr+1) == SLASH_CHARACTER) *ptr = NULL_CHARACTER; else break;
}
if (*ptr == NULL_CHARACTER) {
ptr = one_line;
res = 0;
}
}
// Alpha: variable or reserved word
if (isalpha(*ptr)) {
while (isalpha(*ptr) || isdigit(*ptr)) {
*buffer_p = *ptr;
buffer_p++;
ptr++;
}
*buffer_p = NULL_CHARACTER;
if ((ssym = identify_rev_word(buffer)) == 0) {
ssym = NAME;
// Fix: Check variable length (Max 15 chars)
if (strlen(buffer) > 15) {
printf("Syntax Error (Line: %d): Variable name too long (Max 15)\n", line_count);
exit(1);
}
}
set_token_obj(buffer, ssym);
}
// Digit: number
else if (isdigit(*ptr)) {
while (isdigit(*ptr)) {
*buffer_p = *ptr;
ptr++;
buffer_p++;
}
*buffer_p = NULL_CHARACTER;
ssym = NUMERIC;
set_token_obj(buffer, ssym);
}
// Others: symbols
else {
ssym = identify_symbol(&ptr);
set_token_obj(buffer, ssym);
}
while (*ptr == BLANK_CHARACTER) ptr++;
if (*ptr == NULL_CHARACTER || (*ptr == SLASH_CHARACTER && *(ptr+1) == SLASH_CHARACTER)) ptr = one_line;
return ssym;
}
/* * =========================================================
* Syntax Analysis Part (Fixed Version)
* =========================================================
*/
void program();
void var_def();
void statement();
void expression();
void factor(); // New function for associativity fix
void error_exit(char *msg) {
printf("Syntax Error (Line: %d): %s\n", line_count, msg);
// Debug: printf("Current Token: %d\n", current_token.sym);
exit(1);
}
void check_symbol(int expected, char *err_msg) {
if (current_token.sym == expected) {
getsym();
} else {
error_exit(err_msg);
}
}
// Check if token is a binary operator (Explicit Check)
int is_binary_operator(int sym) {
if (sym == PLUS || sym == MINUS || sym == ASTERRISK || sym == SLASH ||
sym == LOGICAL_AND || sym == LOGICAL_OR ||
sym == GREATER || sym == GREATER_EQUAL ||
sym == LESS || sym == LESS_EQUAL ||
sym == RQUAL || sym == NOT_EQUAL) {
return 1;
}
return 0;
}
/*
* Factor (Primitive element of expression)
* Handles: UnaryOp Factor | ( Expr ) | Num | Var
*/
void factor() {
// Unary operator
if (current_token.sym == PLUS || current_token.sym == MINUS || current_token.sym == ESUAS) {
getsym();
factor(); // Recursive for unary (e.g., - - 5)
}
// Parenthesis
else if (current_token.sym == L_PAREN) {
getsym();
expression(); // Go back to expression inside ()
check_symbol(R_PAREN, "Expected ')'");
}
// Constants
else if (current_token.sym == NUMERIC || current_token.sym == REV_TRUE || current_token.sym == REV_FALSE) {
getsym();
}
// Variable
else if (current_token.sym == NAME) {
// Fix: Check if variable is declared
check_symbol_exists(current_token.id);
getsym();
}
else {
error_exit("Invalid expression start");
}
}
/*
* Expression
* Fix: Changed to loop structure for Left Associativity
* Structure: Factor { Operator Factor }
*/
void expression() {
// Read the first factor (left side)
factor();
// Loop while binary operator exists (left associative)
// e.g., 1 - 2 - 3 -> ((1-2) - 3)
while (is_binary_operator(current_token.sym)) {
getsym(); // Consume operator
factor(); // Read next factor (right side)
}
}
void statement() {
// Assignment: variable = expression ;
if (current_token.sym == NAME) {
// Fix: Check if variable is declared
check_symbol_exists(current_token.id);
getsym();
check_symbol(EQUAL, "Expected '=' for assignment");
expression();
check_symbol(SEMICOLON, "Expected ';'");
}
// if statement
else if (current_token.sym == REV_IF) {
getsym();
check_symbol(L_PAREN, "Expected '(' after if");
expression();
check_symbol(R_PAREN, "Expected ')' after condition");
statement();
if (current_token.sym == REV_ELSE) {
getsym();
statement();
}
}
// while statement
else if (current_token.sym == REV_WHILE) {
getsym();
check_symbol(L_PAREN, "Expected '(' after while");
expression();
check_symbol(R_PAREN, "Expected ')' after condition");
statement();
}
// Block
else if (current_token.sym == L_BRACKET) {
getsym();
while (current_token.sym != R_BRACKET && current_token.sym != EOD) {
statement();
}
check_symbol(R_BRACKET, "Expected '}' at end of block");
}
// scanf
else if (current_token.sym == SCANF) {
getsym();
check_symbol(L_PAREN, "Expected '(' after scanf");
check_symbol(PERCENT_D, "Expected \"%d\" in scanf");
check_symbol(COMMA, "Expected ','");
check_symbol(AMPERSAND, "Expected '&'");
if (current_token.sym == NAME) {
// Fix: Check declarations
check_symbol_exists(current_token.id);
getsym();
} else {
error_exit("Expected variable name");
}
check_symbol(R_PAREN, "Expected ')'");
check_symbol(SEMICOLON, "Expected ';'");
}
// printf
else if (current_token.sym == PRINTF) {
getsym();
check_symbol(L_PAREN, "Expected '(' after printf");
if (current_token.sym == PERCENT_D || current_token.sym == PERCENT_DN || current_token.sym == PERCENT_DT) {
getsym();
} else {
error_exit("Invalid format in printf");
}
check_symbol(COMMA, "Expected ','");
if (current_token.sym == NAME) {
// Fix: Check declarations
check_symbol_exists(current_token.id);
getsym();
} else {
error_exit("Expected variable name in printf");
}
check_symbol(R_PAREN, "Expected ')'");
check_symbol(SEMICOLON, "Expected ';'");
}
// Empty statement
else if (current_token.sym == SEMICOLON) {
getsym();
}
else {
error_exit("Unknown statement");
}
}
void var_def() {
if (current_token.sym == DEC_INT) {
getsym();
while(1) {
if (current_token.sym == NAME) {
// Fix: Add to symbol table and check duplicates
add_symbol(current_token.id);
getsym();
} else {
error_exit("Expected variable name in definition");
}
if (current_token.sym == COMMA) {
getsym();
continue;
} else {
break;
}
}
check_symbol(SEMICOLON, "Expected ';' after variable definition");
}
}
void program() {
check_symbol(DEC_INT, "Program must start with 'int'");
check_symbol(REV_MAIN, "Expected 'main' function");
check_symbol(L_PAREN, "Expected '(' after main");
// Fix: Handle 'void' in main(void)
// 'void' comes as a NAME token with id "void"
if (current_token.sym == NAME && strcmp(current_token.id, "void") == 0) {
getsym(); // Consume 'void'
}
check_symbol(R_PAREN, "Expected ')'");
check_symbol(L_BRACKET, "Expected '{' at start of body");
if (current_token.sym == DEC_INT) {
var_def();
}
while (current_token.sym != R_BRACKET && current_token.sym != EOD) {
statement();
}
check_symbol(R_BRACKET, "Expected '}' at end of program");
}
int main() {
getsym();
if (current_token.sym != EOD) {
program();
}
printf("No error.\n");
return 0;
}Uploaded by ともや
2026-01-30 • 0.01 MB