编译原理实验三:词法分析(2)

本文仅供参考 [ 不提供技术支持 ]

 一、实验目的 

    通过设计编制调试一个具体的词法分析程序,加深对词法分析原理的理解。并掌握在对程序设计语言源程序进行扫描过程中将其分解为各类单词的词法分析方法。

编制一个读单词过程,从输入的源程序中,识别出各个具有独立意义的单词,即基本保留字、标识符、常数、运算符、分隔符五大类。并依次输出各个单词的类型码及单词符号的自身值。(遇到错误时可显示“Error”,然后跳过错误部分继续显示) 

二、实验要求

用C或C++写一个简单的词法分析程序,程序可以满足下列要求:

1. 能分析如下几种简单的语言词法

(1) 标识符: ID=letter(letter|digit)*

(2) 关键字(全部小写)

main int float double char if  then  else switch case break continue while do for

(3)整型常量:NUM=digit digit*

(4)运算符

   = + – * / < <= == != > >= ; ( )? : 

(5)空格由空白、制表符和换行符组成,用以分隔ID、NUM、运算符等,字符分析时被忽略。

2. 单词符号和相应的类别码  (自定) 

以下是转载的一些内容:

https://blog.csdn.net/just_a_new_life/article/details/80895433

https://blog.csdn.net/baidu_41774120/article/details/89350050

https://blog.csdn.net/baidu_41774120/article/details/89640967?utm_medium=distribute.pc_relevant.none-task-blog-baidujs-1

https://www.cnblogs.com/xukai98/p/11106545.html

#include <iostream>
#include <cstdio>
#include <cstring>
#include <cstdlib>
#define _KEY_WORDEND "waiting for your expanding"
using namespace std;
typedef struct //词的结构,二元组形式(单词种别,单词自身的值)
{
	int typenum; //单词种别
	char * word;
}WORD;
char input[255];
char token[255] = "";
int p_input; //指针
int p_token;
char ch;
char * rwtab[] = { "begin","if","then","while","do","end","int","main",
                        "else","float","double","return","cout",_KEY_WORDEND };
 
WORD * scanner();//扫描
 
int main()
{
	int over = 1;
	WORD* oneword = new WORD;
 
	//实现从文件读取代码段
	cout << "read something from data.txt" << endl;
	FILE *fp;
	if((fp=freopen("data.txt","r",stdin))==NULL)
        {
                printf("Not found file!\n");
                return 0;
        }
        else
        {
                while ((scanf("%[^#]s", &input)) != EOF)
                {
                        p_input = 0;
                        printf("your words:\n%s\n", input);
                        while (over < 1000 && over != -1)
                        {
                                oneword = scanner();
                                if (oneword->typenum < 1000)
                                {
                                        if(oneword->typenum != 999)
                                                cout << "[  "<< oneword->typenum <<"\t"<< oneword->word <<"  ]"<< endl;
                                }
                                over = oneword->typenum;
                        }
                        scanf("%[^#]s", input);
                }
        }
    return 0;
}
 
//从输入缓冲区读取一个字符到ch中
char m_getch()
{
	ch = input[p_input];
	p_input++;
	return ch;
}
 
//去掉空白符号
void getbc()
{
	while (ch == ' ' || ch == 10)
	{
		ch = input[p_input];
		p_input++;
	}
}
 
//拼接单词
void concat()
{
	token[p_token] = ch;
	p_token++;
	token[p_token] = '\0';
}
 
//判断是否字母
int letter()
{
	if (ch >= 'a'&&ch <= 'z' || ch >= 'A'&&ch <= 'Z')
		return 1;
	else
		return 0;
}
 
//判断是否数字
int digit()
{
	if (ch >= '0'&&ch <= '9')
		return 1;
	else
		return 0;
}
 
//检索关键字表格
int reserve()
{
	int i = 0;
	while(strcmp(rwtab[i], _KEY_WORDEND))
	{
		if (!strcmp(rwtab[i], token))
			return i + 1;
		i++;
	}
	return 10;//如果不是关键字,则返回种别码10
}
 
//回退一个字符
void retract()
{
	p_input--;
}
 
//词法扫描程序
WORD * scanner()
{
	WORD * myword = new WORD;
	myword->typenum = 10;  //初始值
	myword->word = "";
	p_token = 0;   //单词缓冲区指针
	m_getch();
	getbc();//去掉空白
 
	if (letter())//判断读取到的首字母是字母
	{
	        //如int
		while (letter() || digit())
		{
			concat(); //连接
			m_getch();
		}
		retract(); //回退一个字符
		myword->typenum = reserve();//判断是否为关键字,返回种别码
		myword->word = token;
		return myword;
	}
	else if (digit())  //判断读取到的单词首字符是数字
	{
		while (digit()) //所有数字连接起来
		{
			concat();
			m_getch();
		}
		retract();
		//数字单词种别码统一为20,单词自身的值为数字本身
		myword->typenum = 20;
		myword->word = token;
		return(myword);
	}
	else switch (ch)
	{
	case '=':
		m_getch();//首字符为=,再读取下一个字符判断
		if (ch == '=')
		{
			myword->typenum = 39;
			myword->word = "==";
			return(myword);
		}
		retract();//读取到的下个字符不是=,则要回退,直接输出=
		myword->typenum = 21;
		myword->word = "=";
		return(myword);
		break;
	case '+':
		myword->typenum = 22;
		myword->word = "+";
		return(myword);
		break;
	case '-':
		myword->typenum = 23;
		myword->word = "-";
		return(myword);
		break;
        case '/'://读取到该符号之后,要判断下一个字符是什么符号,判断是否为注释
                m_getch();//首字符为/,再读取下一个字符判断
		if (ch == '*') // 说明读取到的是注释
		{
		        m_getch();
 
			while(ch != '*')
                        {
                                m_getch();//注释没结束之前一直读取注释,但不输出
                                if(ch == '*')
                                {
                                        m_getch();
                                        if(ch == '/')//注释结束
                                        {
                                                myword->typenum = 999;
                                                myword->word = "注释";
                                                return (myword);
                                                break;
                                        }
                                }
 
                        }
 
		}
                else
                {
                        retract();//读取到的下个字符不是*,即不是注释,则要回退,直接输出/
 
                        myword->typenum = 25;
                        myword->word = "/";
                        return (myword);
                        break;
                }
        case '*':
		myword->typenum = 24;
		myword->word = "*";
		return(myword);
		break;
	case '(':
		myword->typenum = 26;
		myword->word = "(";
		return(myword);
		break;
	case ')':
		myword->typenum = 27;
		myword->word = ")";
		return(myword);
		break;
	case '[':
		myword->typenum = 28;
		myword->word = "[";
		return(myword);
		break;
	case ']':
		myword->typenum = 29;
		myword->word = "]";
		return(myword);
		break;
	case '{':
		myword->typenum = 30;
		myword->word = "{";
		return(myword);
		break;
	case '}':
		myword->typenum = 31;
		myword->word = "}";
		return(myword);
		break;
	case ',':
		myword->typenum = 32;
		myword->word = ",";
		return(myword);
		break;
	case ':':
		m_getch();
		if (ch == '=')
		{
			myword->typenum = 18;
			myword->word = ":=";
			return(myword);
			break;
		}
		else
                {
                        retract();
                        myword->typenum = 33;
                        myword->word = ":";
                        return(myword);
                        break;
                }
        case ';':
                myword->typenum = 34;
                myword->word = ";";
                return(myword);
                break;
	case '>':
		m_getch();
		if (ch == '=')
		{
			myword->typenum = 37;
			myword->word = ">=";
			return(myword);
			break;
		}
		retract();
		myword->typenum = 35;
		myword->word = ">";
		return(myword);
		break;
	case '<':
		m_getch();
		if (ch == '=')
		{
			myword->typenum = 38;
			myword->word = "<=";
			return(myword);
			break;
		}
		else if(ch == '<')
                {
                        myword->typenum = 42;
			myword->word = "<<";
			return(myword);
			break;
                }
                else
                {
                        retract();
                        myword->typenum = 36;
                        myword->word = "<";
                        return (myword);
                }
	case '!':
		m_getch();
		if (ch == '=')
		{
			myword->typenum = 40;
			myword->word = "!=";
			return(myword);
			break;
		}
		retract();
		myword->typenum = -1;
		myword->word = "ERROR";
		return(myword);
		break;
        case ' " ':
                myword->typenum = 41;
		myword->word = " \" ";
		return(myword);
		break;
	case '\0':
		myword->typenum = 1000;
		myword->word = "OVER";
		return(myword);
		break;
        case '#':
                myword->typenum = 0;
                myword->word = "#";
                return (myword);
                break;
	default:
		myword->typenum = -1;
		myword->word = "ERROR";
		return(myword);
		break;
	}
}
 
 
#include <iostream>
#include <string.h>
using namespace std;
char key[6][20] = {"begin","if","then","while","do","end"};//定义一个二维数组存放关键字 
char token[20];	//存放字符(单词) 
//判断关键字 
int isKey(char s[])
{
	for(int i = 0; i<6;i++)
	{
		if(strcmp(s,key[i]) == 0)
		{
			return i+1;	//关键字的种别码依次为 begin=1,if=2,then=3,while=4,do=5,end=6即为 i+1 的值 
		}
	}
	return -1;
}
//判断是不是字母 
bool isChar(char ch)
{
	if((ch>='a' && ch<='z') || (ch>='A' && ch<='Z'))
	return true;
	else return false;
}
//判断是不是数字 
bool isNum(char ch)
{
	if(ch>='0' && ch<='9')
	return true;
	else
	return false;
}
//核心子程序
void scanner(int &syn,int &p,char s[])
{
	int count = 0;
	if(s[p] == ' ') p++;
	//开头是字母
	if(isChar(s[p]))
	{
		while(isNum(s[p]) || isChar(s[p]))
		{
			token[count++] = s[p];
			p++;
		}
		token[count] = '\0';	//'\0'作为结束符 ,将单词分隔开 
		syn = isKey(token);
		if(syn == -1)	
		{
			syn = 10;	//标识符letter(letter|digit) *
		}
	}
	
	//开头是数字
	else if(isNum(s[p]))
	{
		while(isNum(s[p]))
		{
			token[count++] = s[p];
			p++;
		}
		token[count] = '\0';//结束标识 
		syn = 11;	//数字digit(digit) *
	}
	
	//如果是运算符或者界符
	else
	{	
		//先处理没有争议的字符 
		switch(s[p])
		{
			case '+': syn = 13;token[0] = s[p];token[1]='\0';break;
			case '-': syn = 14;token[0] = s[p];token[1]='\0';break;
			case '*': syn = 15;token[0] = s[p];token[1]='\0';break;
			case '/': syn = 16;token[0] = s[p];token[1]='\0';break;
			case '=': syn = 25;token[0] = s[p];token[1]='\0';break;
			case ';': syn = 26;token[0] = s[p];token[1]='\0';break;
			case '(': syn = 27;token[0] = s[p];token[1]='\0';break;
			case ')': syn = 28;token[0] = s[p];token[1]='\0';break;
			case '#': syn = 0 ;token[0] = s[p];token[1]='\0';break;
		}
		
		
		//处理有争议的
		//: :=
		if(s[p] == ':')
		{
			token[count++] = s[p];
			if(s[p+1] == '=')
			{
				p++;
				token[count++] = s[p];
				syn = 18;
			}
			else
			{
				syn = 17;
			}
			token[count] = '\0';
		}
		
		//< <> <=
		if(s[p] == '<')
		{
			token[count++] = s[p];
			if(s[p+1] == '>')
			{
				p++;
				token[count++] = s[p];
				syn = 21;
			}
			else if(s[p+1] == '=')
			{
				p++;
				token[count++] = s[p];
				syn = 22;
			}
			else
			{
				syn = 20;
			}
			token[count] = '\0';
		}
		
		//> >=
		if(s[p] == '>')
		{
			token[count++] = s[p];
			if(s[p+1] == '=')
			{
				p++;
				token[count++] = s[p];
				syn = 24;
			}
			else
			{
				syn = 23;
			}
			token[count] = '\0';
		}
		
		//后移 
		p++;	//判断运算符和界符的这部分由于指针 p 没有向后指,所以需要将指针 p 向后移一位 
	}
	 
} 
int main()
{
	char s[100];	//输入字符串 
	while(cin>>s)
	{
		int p = 0;
		int syn;
		while(p < strlen(s))	//循环条件 ,p还未指到字符串最后	
		{
			scanner(syn,p,s);
			cout<<'<'<<token<<','<<syn<<'>'<<endl;
		}
	}
	return 0;
}
#include<iostream>
#include<fstream>
#include<cstdio>
#include<cstring>
#include<string>
#include<cstdlib>

using namespace std;

int aa;// fseek的时候用来接着的
string  word="";
string  reserved_word[20];//保留
char buffer;//每次读进来的一个字符
int num=0;//每个单词中当前字符的位置
int line=1; //行数
int row=1; //列数,就是每行的第几个
bool flag; //文件是否结束了
int flag2;//单词的类型


//预处理函数
int processor(){//预处理函数
FILE *p;
 int  falg = 0,len,i=0,j=0;
 char str[1000],str1[1000],c;
 if((p=fopen("code.txt","rt"))==NULL){
  printf("无法打开要编译的源程序");
  return  0;
 }
 else{
  //fgets(str,1000,p);
  while((c=getc(p))!=EOF){
   str[i++] = c;
  }
  fclose(p);
  str[i] = '\0';
  for(i=0;i<strlen(str);i++){
   if(str[i]=='/'&&str[i+1]=='/'){
    while(str[i++]!='\n'){}
   }//单行注释
   else if(str[i]=='/'&&str[i+1]=='*'){
    while(!(str[i]=='*'&&str[i+1]=='/')){i++;}
    i+=2;
   }//多行注释
   else if(str[i]==' '&&str[i+1]==' '){
    while(str[i]==' '){i++;}
    i--;
    if(str1[j-1]!=' ')
       str1[j++]=' ';
   }//多个空格,去除空格
   else if(str[i]=='\n') {
    if(str1[j-1]!=' ')
       str1[j++]=' ';
   }//换行处理,
   else if(str[i]==9){
    while(str[i]==9){
     i++;
    }
    if(str1[j-1]!=' ')
     str1[j++]=' ';
    i--;
   }//tab键处理
   else str1[j++] = str[i];//其他字符处理
  }
  str1[j] = '\0';
  if((p = fopen("afterdel.txt","w"))==NULL){
   printf("can not find it!");
   return 0;
  }
  else{
   if(fputs(str1,p)!=0){
    printf("预处理失败!");
   }
   else printf("预处理成功!");
  }
  fclose(p);
 }
 return 0;
 }

 //设置保留字
void set_reserve()
{
    reserved_word[1]="return";
    reserved_word[2]="def";
    reserved_word[3]="if";
    reserved_word[4]="else";
    reserved_word[5]="while";
    reserved_word[6]="return";
    reserved_word[7]="char";
    reserved_word[8]="for";
    reserved_word[9]="and";
    reserved_word[10]="or";
    reserved_word[11]="int";
    reserved_word[12]="bool";
}

//看这个字是不是字母
bool judge_word(char x)
{
    if(x>='a' && x<='z' || x>='A' && x<='Z' ){
        return true;
    }
    else return false;
}

//看这个字是不是数字
bool judge_number(char x)
{
    if(x>='0' && x<='9'){
        return true;
    }
    else return false;
}

//看这个字符是不是界符
bool judge_jiefu(char x)
{
    if(x=='('||x==')'||x==','||x==';'||x=='{'||x=='}'){
        return true;
    }
    else return false;
}


//加减乘
bool judge_yunsuanfu1(char x)
{
    if(x=='+'||x=='-'||x=='*')
    {
        return true;
    }
    else return false;
}

//等于 赋值,大于小于 大于等于,小于等于,大于小于
bool judge_yunsuannfu2(char x)
{
    if(x=='='|| x=='>'||x=='<'||x=='&'||x=='||'){
        return true;
    }
    else return false;
}


//这个最大的函数的总体作用是从文件里读一个单词
int scan(FILE *fp)
{
    buffer=fgetc(fp);//读取一个字符
    if(feof(fp)){//检测结束符
        flag=0;return 0;
    }
    else if(buffer==' ')
    {
        row++;
        return 0;
    }
    else if(buffer=='\n')
    {
        row=1;
        return 0;
    }
    //如果是字母开头或'_' 看关键字还是普通单词
    else if(judge_word(buffer) || buffer=='_')
    {
        word+=buffer;
        row++;
        while((buffer=fgetc(fp)) && (judge_word(buffer) || judge_number(buffer) || buffer=='_'))
        {
            word+=buffer;
            row++;
        }
        if(feof(fp)){
                flag=0;
                return 1;
        }
        for(int i=1;i<=12;i++){
            if(word==reserved_word[i]){
                aa=fseek(fp,-1,SEEK_CUR);//如果执行成功,stream将指向以fromwhere为基准,偏移offset(指针偏移量)个字节的位置,函数返回0。
                return 3;
            }
        }
        aa=fseek(fp,-1,SEEK_CUR);
        return 1;
    }

    //开始是加减乘 一定是类型4
    else if(judge_yunsuanfu1(buffer))
    {
        word+=buffer;
        row++;
        return 4;
    }

    //开始是数字就一定是数字
    else if(judge_number(buffer))
    {
        word+=buffer;
        row++;
        while((buffer=fgetc(fp)) && judge_number(buffer))
        {
            word+=buffer;
            row++;
        }
        if(feof(fp)){
            flag=0;
            return 2;
        }
        aa=fseek(fp,-1,SEEK_CUR);
        return 2;
    }

    //检验界符
    else if(judge_jiefu(buffer))
    {
        word+=buffer;
        row++;
        return 6;
    }

    //检验 <=、  >=、  <>、  == =、 <、>
    else if(judge_yunsuannfu2(buffer))
    {
        row++;
        word+=buffer;
        if(buffer=='<')   //为了检验题目中的<> <=
        {
            buffer=fgetc(fp);
            if(buffer=='>' || buffer=='=')
            {
                word+=buffer;
                row++;
                return 5;
            }
        }
        //检验  >= ==
        else{
            buffer=fgetc(fp);
            if(buffer=='=')
            {
                word+=buffer;
                row++;
                return 5;
            }
        }
        if(feof(fp)){
                flag=0;
        }
        aa=fseek(fp,-1,SEEK_CUR);
        return 4;
    }

    //首字符是/ 有可能是除号 也有可能是注释
    else if(buffer=='/')
    {
        row++;
        word+=buffer;
        buffer=fgetc(fp);
        aa=fseek(fp,-1,SEEK_CUR);
            return 4;
    }

    else {
        word+=buffer;
        row++;
        return -1;
    }
}

int main()
{
    set_reserve();//设置保留字
    processor();
    cout<<"open "<<"afterdel.txt"<<endl;
    flag=1;
    FILE *fp;
    if(!(fp=fopen("afterdel.txt","r")))
    {
        cout<<"not found the file or other error "<<endl;
        flag=0;
    }

    while(flag==1)
    {
        //flag2 返回的类型
        flag2=scan(fp);//反复调用函数提取单词

        if(flag2==1)
        {
            cout<<"type:1 identifier      "<<word<<endl;
            if(word.length()>20)
            cout<<"ERROR Identifier length cannot exceed 20 characters"<<endl;
            word.erase(word.begin(),word.end());
        }
        else if(flag2==3)
        {
            cout<<"type:3 reserved word   "<<word<<endl;
            word.erase(word.begin(),word.end());
        }
        else if(flag2==4)
        {
            cout<<"type:4 unary_operator  "<<word<<endl;
            word.erase(word.begin(),word.end());
        }
        else if(flag2==2)
        {
            cout<<"type:2 positive number "<<word<<endl;
            //if(word[0]=='0')
            //cout<<"ERROR: The first digit cannot be 0!"<<endl;
            word.erase(word.begin(),word.end());
        }
        else if(flag2==6)
        {
            cout<<"type:6 Separator       "<<word<<endl;
            word.erase(word.begin(),word.end());
        }
        else if(flag2==5)
        {
            cout<<"type:5 double_operator "<<word<<endl;
            word.erase(word.begin(),word.end());
        }
        //非法字符
        else if(flag2==-1)
        {
           cout<<"Illegal character      "<<word<<endl;
           word.erase(word.begin(),word.end());
        }
    }

        int a=fclose(fp);
        cout<<"press e to close"<<endl;
        char end;
        while(cin>>end && end!='e'){
            cout<<"只有e可以关闭"<<endl;
        }
    return 0;
}

付费一次, 以下几个内容(另外3份代码)全部可见 [ 不提供技术支持 ]

© 版权声明
THE END
喜欢就支持以下吧
点赞0赞赏
分享
评论 抢沙发

请登录后发表评论