Assignment 2 04042021 045308pm
Assignment 2 04042021 045308pm
A scanner needs to compute as many attributes as are necessary to allow further processing.
Since the scanner will have to compute possibly several attributes for each token. It is often
helpful to collect all the attributes into a single structured data type which we could call a token
record. Such a record could be declared in C as
typedef struct {
TokenType tokenval ;
char * stringval;
Int numval;
} TokenRecord ;
A common arrangement is for the scanner to return the token value only and place the other
attributes in global variables where they can be accessed by other parts of compiler. Although the
task of scanner is to convert the entire source program into a sequence of tokens, the scanner will
rarely do this all at once. Instead the scanner will operate under the control of parser , returning
the single next token from the input on demand. So the scanner will be declared as a function
such as
TokenType getToken(void);
Regular expressions: Regular expressions represent patterns of string of characters. Patterns
recognized by scanner are defined by Regular expressions.
Reserved words and identifiers: Reserved words are the simplest to write a regular expression:
they are represented by their fixed sequence of characters. If we wanted to collect the reserved
words into one definition, we could write something like
Reserved = if | while | do | ….
Identifiers: identifiers are strings of characters which are not fixed. Typically an identifier must
begin with a letter and contain only letters and digits. We can express this in terms of regular
definitions as letter = [a-zA-Z] digit = [0-9]
identifier = letter(letter|digit)*
Numbers: Numbers can be just sequence of digits (natural numbers), or decimal numbers, or
numbers with an exponent. We can write regular definitions for these numbers as follows:
nat = [0-9]+ signedNat = (+|-)? nat number =
signedNat (“.” nat) ? (E signedNat) ?
Finite automate: Finite automata or finite-state machines are a mathematical way of describing
particular kind of algorithms. In particular, finite automata can be used to describe the process of
recognizing patterns in input strings, and so can be used to construct scanners. Finite automata
can be described using transition diagrams the following example illustrates this. The transition
diagram makes it easy to visualize the scanner algorithm and code can be written easily by hand
if the scanner it to process simple language. consider the following operators. < , <= , <> , > ,
>= , =
We can represent them as a transition diagram. Then we can write code using the transition
diagram.
Transition Table: In the above code example, the Finite automata has been hardwired right into
the code. It is also possible to express the DFA as a data structure and then write “generic” code
that will take its actions from the data structure.
A simple data structure that is adequate for this purpose is a transition table. A two dimensional
array, indexed by state and input character that expresses the values of the transition function T.
Consider the DFA for identifier:
Operators associativity
* / % multiply, divide, mod Left to right
+ - add, subtract Left to right
<< >> shift left, shift right Left to right
< <= > >= less than, less or equal, greater, greater or equal Left to right
== != equal, not equal Left to right
&& logical and Left to right
|| logical or Left to right
= += -= *= /= %= >>= <<= assignments Right to left
Tasks
#include <iostream>
#include<fstream>
#include<string>
#include<cctype>
using namespace std;
ifstream f("symbol.txt",ios::in);
char ch;
int i=0;
typedef enum {IF=1,ELSE,WHILE,DO,FOR,LPRN,RPRN,OPR,Break, Switch, Auto, Cin,Goto,
True,
False,UID,FLOAT,LCRBR,RCRBR,LSQRBR,RSQRBR,PLUSPLUS,PLUS,PLUSEQ,EQEQ,E
QUAL,Div,Not,
Mul,SEMICLN,CLN,COMMA,COMMENT,LESS,LESSEQ,GREATER,GREATEREQ,INT,ST
RING,CHAR,DOUBLE}TokenType;
struct token{
TokenType TknType;
int no;
string name;
string entry_no;
};
token tk[30];
token t;
static int a=0;
token makeToken(){
while(!f.eof()){
ch=f.get();//a
if (isalpha(ch)|| ch=='_'){
string s=" ";
i++;
do{
s=s+ch;//a
ch=f.get();//b
if(ch==' '||ch=='\n'||ch=='\t'){
t.name =s;
t.entry_no="Varriable"<<endl;
t.no=i;
tk[a]=t;
a++;
}
}
while(isalpha(ch)|| ch=='_'||isdigit(ch));
}
else if(isdigit(ch)){
string s =" ";
i++;
do{
s=s+ch;
ch=f.get();
if(ch==' '||ch=='\n'||ch=='\t'){
t.name=s;
t.entry_no="NUM";
t.no==i;
return t;
}
}
while(isdigit(ch));
}
else if(ch=='(') {
t.TknType=LPRN;
t.entry_no="LPRN";
returntokn;}
elseif(ch==')'){
t.TknType=RPRN;
t.entry_no="RPRN";
return t;
}
elseif(ch=='['){
t.TknType=LSQRBR;
t.entry_no="LSQRBR";
return t;
}
elseif(ch==']')
{
t.TknType=RSQRBR;
t.entry_no="RSQRBR";
return t;
}
elseif(ch=='{'){
t.TknType=LCRBR;
t.entry_no="LCRBR";
return t;
}
elseif(ch=='}'){
t.TknType=RCRBR;
t.entry_no="RCRBR";
return t;
}
elseif(ch==';'){
t.TknType=CLN;
t.entry_no="CLN";
return t;
}
elseif(ch==':'){
t.TknType=SEMICLN;
t.entry_no="SEMICLN";
return t;
}elseif(ch==','){
t.TknType=COMMA;
t.entry_no="COMMA";
return t;
}
elseif( ch=='/'){
if(ch=='/')
{
t.TknType=COMMENT;
t.entry_no="COMMENT";
return t;
}
elseif(ch=='*'){
t.TknType=COMMENT;
t.entry_no="COMMENT";
return t;
}
}
elseif(ch=='*'){
if( ch=='/'){
t.TknType=COMMENT;
t.entry_no="COMMENT";
return t;
}
}
elseif(ch=='+'){
if(ch=='=')
{
t.TknType=PLUSEQ;
t.entry_no="PLUSEQ";
return t;
}
elseif(ch=='+'){
t.TknType=PLUSPLUS;
t.entry_no="PLUSPLUS";
return t;
}
else{
t.TknType=PLUS;
t.entry_no="PLUS";
return t;
}
}
elseif(ch=='='){
if(ch=='='){
t.TknType=EQEQ;
t.entry_no="EQEQ";
return t;
}
else{
t.TknType=EQUAL;
t.entry_no="EQAUL";
return t;
}
}
elseif(ch=='<'){
if(ch=='='){
t.TknType=LESSEQ;
t.entry_no="LESSEQ";
return t;
}
else
{
t.TknType=EQUAL;
t.entry_no="EQUAL";
return t;
}
}
elseif(ch=='>'){
if(ch=='='){
t.TknType=GREATEREQ;
t.entry_no="GREATEREQ";
return t;
}
else{
t.TknType=GREATER;
t.entry_no="GREATER";
return t;
}
if(s=="int"){
t.TknType=INT;
t.entry_no="NULL";
return t;
}
elseif(s=="string"){
t.TknType=STRING;
t.entry_no="NULL";
return t;
}
elseif(s=="char"){
t.TknType=CHAR;
t.entry_no="NULL";
return t;
}
elseif(s=="for")
{
t.TknType=FOR;
t.entry_no="NULL";
return t;
}
elseif(s=="while"){
t.TknType=WHILE;
t.entry_no="NULL";
return t;
}
elseif(s=="do"){
t.TknType=DO;
t.entry_no="NULL";
return t;
}
elseif(s=="if"){
t.TknType=IF;
t.entry_no="NULL";
return t;
}
elseif(s=="else"){
t.TknType=ELSE;
t.entry_no="NULL";
return t;
}
elseif(s=="float"){
t.TknType=FLOAT;
t.entry_no="NULL";
return t;
}
elseif(s=="double"){
t.TknType=DOUBLE;
t.entry_no="NULL";
return t;
}
}
}
}
}
int main(){
makeToken();
for(int i=0;i<30;i++){
cout<<tk[i].TknType<<" "<<tk[i].name<<" "<<tk[i].no<<" "<<tk[i].entry_no<<endl;
}
return 0;
}