/*
* Assignment: Tiny C Parser
* Student ID: 24412014
* Name: Tomoya Kitamura
* Date: 2026/01/30
*/
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
/* * =========================================================
* Content from distributed define_token.h
* =========================================================
*/
#define START_TOKEN 0
#define EOD 1
#define UNKNOWN 2
#define NAME 10
#define NUMERIC 11
#define BOOLEAN_VAL 12
#define REV_TRUE 21
#define REV_FALSE 22
#define REV_PERCENT_D 23
#define REV_PERCENT_T 24
#define DEC_INT 31
#define REV_MAIN 32
#define REV_IF 33
#define REV_ELSE 34
#define REV_WHILE 35
#define SCANF 36
#define PRINTF 37
#define L_BRACKET 51 // {
#define R_BRACKET 52 // }
#define COMMA 53 // ,
#define SEMICOLON 54 // ;
#define EQUAL 55 // =
#define PLUS 56 // +
#define MINUS 57 // -
#define ESUAS 58 // !
#define ASTERRISK 59 // *
#define SLASH 60 // /
#define L_PAREN 61 // (
#define R_PAREN 62 // )
#define LOGICAL_AND 81
#define LOGICAL_OR 82
#define GREATER 83
#define GREATER_EQUAL 84
#define LESS 85
#define LESS_EQUAL 86
#define RQUAL 87
#define NOT_EQUAL 88
#define AMPERSAND 89
#define PERCENT_D 101
#define PERCENT_DN 102
#define PERCENT_DT 103
#define BLANK ' '
// Structure for reserved words
struct rev_word_typ {
int token_num;
char rev_word[16];
};
// Definition of reserved words
struct rev_word_typ rev_word[] = {
{REV_TRUE, "true"},
{REV_FALSE, "false"},
{DEC_INT, "int"},
{REV_MAIN, "main"},
{REV_IF, "if"},
{REV_ELSE, "else"},
{REV_WHILE, "while"},
{SCANF, "scanf"},
{PRINTF, "printf"}
};
char *formatter[] = {"%d", "%d\\n", "%d\\t"};
// Structure for token object
struct token_obj_typ {
char id[256];
int num;
int sym;
};
// Current token
struct token_obj_typ current_token;
// Variables for error reporting
int line_count = 0; // Line number
int token_col = 0; // Column number (added)
/* * =========================================================
* Lexical Analysis Part (getsym)
* Based on distributed getsym_p.c
* =========================================================
*/
#define ONE_LINE_LENGTH 256
#define REV_WORD_LENGTH 9
#define EOL -1
#define NULL_CHARACTER '\0'
#define BLANK_CHARACTER ' '
#define SLASH_CHARACTER '/'
// Function to identify reserved words
int identify_rev_word(char *p) {
int i, rtn;
for(i=0; i < REV_WORD_LENGTH && strcmp(p, rev_word[i].rev_word); i++);
if (i == REV_WORD_LENGTH) rtn = 0; else rtn = rev_word[i].token_num;
return rtn;
}
// Function to identify symbols
int identify_symbol(char **ptr) {
char cc = **ptr;
char *cp = *ptr;
char *bptr;
int sm = UNKNOWN;
int i;
switch (cc) {
case '{': sm = L_BRACKET; (*ptr)++; break;
case '}': sm = R_BRACKET; (*ptr)++; break;
case '(': sm = L_PAREN; (*ptr)++; break;
case ')': sm = R_PAREN; (*ptr)++; break;
case ',': sm = COMMA; (*ptr)++; break;
case ';': sm = SEMICOLON; (*ptr)++; break;
case '=':
(*ptr)++;
if (**ptr == '=') { sm = RQUAL; (*ptr)++; } else { sm = EQUAL; }
break;
case '&':
(*ptr)++;
if (**ptr == '&') { sm = LOGICAL_AND; (*ptr)++; } else { sm = AMPERSAND; }
break;
case '|':
(*ptr)++;
if (**ptr == '|') { sm = LOGICAL_OR; (*ptr)++; } else { sm = UNKNOWN; }
break;
case '>':
(*ptr)++;
if (**ptr == '=') { sm = GREATER_EQUAL; (*ptr)++; } else { sm = GREATER; }
break;
case '<':
(*ptr)++;
if (**ptr == '=') { sm = LESS_EQUAL; (*ptr)++; } else { sm = LESS; }
break;
case '+': sm = PLUS; (*ptr)++; break;
case '-': sm = MINUS; (*ptr)++; break;
case '!':
(*ptr)++;
if (**ptr == '=') { sm = NOT_EQUAL; (*ptr)++; } else { sm = ESUAS; }
break;
case '*': sm = ASTERRISK; (*ptr)++; break;
case '/': sm = SLASH; (*ptr)++; break;
case '"':
(*ptr)++;
bptr = *ptr;
while (**ptr != '"' && **ptr != NULL_CHARACTER) (*ptr)++;
if (**ptr == NULL_CHARACTER) {
sm = UNKNOWN;
} else {
for(i=0; i<3; i++) {
if (!strncmp(bptr, formatter[i], (int)(*ptr - bptr))) break;
}
if (i == 3) sm = UNKNOWN; else sm = PERCENT_D + i;
}
(*ptr)++;
break;
}
return sm;
}
// Set values to token object
void set_token_obj(char *buffer, int ssym) {
switch (ssym) {
case NAME:
strcpy(current_token.id, buffer);
current_token.sym = ssym;
break;
case NUMERIC:
current_token.sym = ssym;
current_token.num = atoi(buffer);
break;
default:
current_token.sym = ssym;
break;
}
}
// Function to read one line
int get_line(char *buffer) {
int ch='\0';
char *ptr = buffer;
int counter=0;
while ((ch = getchar()) != EOF) {
if (ch == '\n' || ch == 0x0a) {
*ptr = ' ';
ptr++;
counter++;
*ptr = NULL_CHARACTER;
// Count line number
line_count++;
break;
} else {
*ptr = ch;
ptr++;
counter++;
}
}
if (counter) return counter; else return EOL;
}
// Main function to get token (getsym)
int getsym() {
static int ssym = START_TOKEN;
static char one_line[ONE_LINE_LENGTH];
static char *ptr = one_line;
char buffer[ONE_LINE_LENGTH], *buffer_p=buffer;
int res=0;
// If buffer is empty, read new line
while (ptr == one_line && res == 0) {
res = get_line(one_line);
if (res == EOL) {
ptr = one_line;
return EOD; // End of file
}
// Skip comments and blanks
while (*ptr != NULL_CHARACTER) {
if (*ptr == BLANK_CHARACTER) {ptr++; continue;}
if (*ptr == SLASH_CHARACTER && *(ptr+1) == SLASH_CHARACTER) *ptr = NULL_CHARACTER; else break;
}
if (*ptr == NULL_CHARACTER) {
ptr = one_line;
res = 0;
}
}
// Calculate column number: pointer position - start address + 1
token_col = (int)(ptr - one_line) + 1;
// Alpha: variable or reserved word
if (isalpha(*ptr)) {
while (isalpha(*ptr) || isdigit(*ptr)) {
*buffer_p = *ptr;
buffer_p++;
ptr++;
}
*buffer_p = NULL_CHARACTER;
if ((ssym = identify_rev_word(buffer)) == 0) ssym = NAME;
set_token_obj(buffer, ssym);
}
// Digit: number
else if (isdigit(*ptr)) {
while (isdigit(*ptr)) {
*buffer_p = *ptr;
ptr++;
buffer_p++;
}
*buffer_p = NULL_CHARACTER;
ssym = NUMERIC;
set_token_obj(buffer, ssym);
}
// Others: symbols
else {
ssym = identify_symbol(&ptr);
set_token_obj(buffer, ssym);
}
while (*ptr == BLANK_CHARACTER) ptr++;
if (*ptr == NULL_CHARACTER || (*ptr == SLASH_CHARACTER && *(ptr+1) == SLASH_CHARACTER)) ptr = one_line;
return ssym;
}
/* * =========================================================
* Syntax Analysis Part
* Implemented based on Tiny C specification
* =========================================================
*/
// Prototype declarations
void program();
void var_def();
void statement();
void expression();
// Function to handle syntax error
// Display line number, column, and error message
void error_exit(char *msg) {
// Modified to include column number
printf("Syntax Error (Line: %d, Column: %d): %s\n", line_count, token_col, msg);
exit(1);
}
// Function to check expected token
void check_symbol(int expected, char *err_msg) {
if (current_token.sym == expected) {
getsym(); // move to next token
} else {
error_exit(err_msg);
}
}
/*
* Parse Expression
*/
void expression() {
// Unary operator
if (current_token.sym == PLUS || current_token.sym == MINUS || current_token.sym == ESUAS) {
getsym();
expression();
}
// Parenthesis
else if (current_token.sym == L_PAREN) {
getsym();
expression();
check_symbol(R_PAREN, "Expected ')'");
}
// Constant
else if (current_token.sym == NUMERIC || current_token.sym == REV_TRUE || current_token.sym == REV_FALSE) {
getsym();
}
// Variable
else if (current_token.sym == NAME) {
getsym();
}
else {
error_exit("Invalid expression start");
}
// Binary operators
while ((current_token.sym >= PLUS && current_token.sym <= SLASH && current_token.sym != ESUAS) ||
(current_token.sym >= LOGICAL_AND && current_token.sym <= NOT_EQUAL)) {
getsym();
expression();
}
}
/*
* Parse Statement
*/
void statement() {
// Assignment
if (current_token.sym == NAME) {
getsym();
check_symbol(EQUAL, "Expected '=' for assignment");
expression();
check_symbol(SEMICOLON, "Expected ';'");
}
// if
else if (current_token.sym == REV_IF) {
getsym();
check_symbol(L_PAREN, "Expected '(' after if");
expression();
check_symbol(R_PAREN, "Expected ')' after condition");
statement();
if (current_token.sym == REV_ELSE) {
getsym();
statement();
}
}
// while
else if (current_token.sym == REV_WHILE) {
getsym();
check_symbol(L_PAREN, "Expected '(' after while");
expression();
check_symbol(R_PAREN, "Expected ')' after condition");
statement();
}
// Block
else if (current_token.sym == L_BRACKET) {
getsym();
while (current_token.sym != R_BRACKET && current_token.sym != EOD) {
statement();
}
check_symbol(R_BRACKET, "Expected '}' at end of block");
}
// scanf
else if (current_token.sym == SCANF) {
getsym();
check_symbol(L_PAREN, "Expected '(' after scanf");
check_symbol(PERCENT_D, "Expected \"%d\" in scanf");
check_symbol(COMMA, "Expected ','");
check_symbol(AMPERSAND, "Expected '&'");
if (current_token.sym == NAME) {
getsym();
} else {
error_exit("Expected variable name");
}
check_symbol(R_PAREN, "Expected ')'");
check_symbol(SEMICOLON, "Expected ';'");
}
// printf
else if (current_token.sym == PRINTF) {
getsym();
check_symbol(L_PAREN, "Expected '(' after printf");
if (current_token.sym == PERCENT_D || current_token.sym == PERCENT_DN || current_token.sym == PERCENT_DT) {
getsym();
} else {
error_exit("Invalid format in printf");
}
check_symbol(COMMA, "Expected ','");
if (current_token.sym == NAME) {
getsym();
} else {
error_exit("Expected variable name in printf");
}
check_symbol(R_PAREN, "Expected ')'");
check_symbol(SEMICOLON, "Expected ';'");
}
// Empty
else if (current_token.sym == SEMICOLON) {
getsym();
}
else {
error_exit("Unknown statement");
}
}
/*
* Parse Variable Definition
*/
void var_def() {
if (current_token.sym == DEC_INT) {
getsym();
while(1) {
if (current_token.sym == NAME) {
getsym();
} else {
error_exit("Expected variable name in definition");
}
if (current_token.sym == COMMA) {
getsym();
continue;
} else {
break;
}
}
check_symbol(SEMICOLON, "Expected ';' after variable definition");
}
}
/*
* Parse Program
*/
void program() {
check_symbol(DEC_INT, "Program must start with 'int'");
check_symbol(REV_MAIN, "Expected 'main' function");
check_symbol(L_PAREN, "Expected '(' after main");
check_symbol(R_PAREN, "Expected ')'");
check_symbol(L_BRACKET, "Expected '{' at start of body");
if (current_token.sym == DEC_INT) {
var_def();
}
while (current_token.sym != R_BRACKET && current_token.sym != EOD) {
statement();
}
check_symbol(R_BRACKET, "Expected '}' at end of program");
}
// Main function
int main() {
getsym();
if (current_token.sym != EOD) {
program();
}
printf("No error.\n");
return 0;
}
Uploaded by ともや
2026-01-30 • 0.01 MB