C语言词法分析器中.clear()和.insert()两处出现问题

邢栋 发布于 2015/04/20 21:01
阅读 485
收藏 0

【Gopher China万字分享】华为云的Go语言云原生实战经验!>>>

#include <iostream>
#include <fstream>
#include <cctype>
#include <string>
using namespace std;
 
 class SourceCode{
 private:
    string code;
    int index;
    int length;
 public:
    SourceCode()
 {
       code="";
       index=0;
       length=0;
    }
 
    SourceCode(string code){
        this->code=code;
        index=0;
        length=code.length();
    }
 
    void setCode(string code){
        this->code=code;
        index=0;
        length=code.length();
    }
 
    string getCode(){
        return code;
    }
 
    int getIndex(){
        return index;
    }
 
    char nextChar(){
        return code[index++];
    }
 
    char getPreChar(){
        return code[index-1];
    }
 
    void retract(){
        index--;
    }
 
    bool end(){
        return index>=length;
    }
};
 
SourceCode sourceCode;
string keyWords[]={ "auto","short","int","long","float","double","char","struct","union","enum",
                    "typedef","const","signed","unsigned","exterm","register","static","volatile","void",
                    "if","else","switch","case","for","do","while","goto","continue","break",
                    "default","sizeof","return"};
 
string precompiledDirectives[]={"include","define","undef","if","ifdef","ifndef","elif","endif","error"};
 char getbc(){
    char ch = sourceCode.nextChar();
    while(!sourceCode.end() && isspace(ch)){
        ch = sourceCode.nextChar();
    }
    if(!isspace(ch)) return ch;
    else return -1;
}
 char getbcBuLFandCR(){
    char ch = sourceCode.nextChar();
    while(!sourceCode.end() && isspace(ch) && ch!='\r' && ch!='\n'){
        ch = sourceCode.nextChar();
    }
    if(sourceCode.end()) return -1;
    else return ch;
}
 bool letter(char ch){
    return ch>='a' && ch <='z' || ch>='A' && ch<='Z';
}
 bool digit(char ch){
    return ch>='0' && ch <='9';
}
 bool digitOctonary(char ch){
    return ch>='0' && ch<='7';
}
 bool digitHexadecimal(char ch){
    return ch>='0' && ch<='9' || ch>='a' && ch<='f' || ch>='A' && ch<='F';
}
 int HexToDec(string s){
    int result=0;
    for(int i=0;s[i]!='\0';i++){
        if(s[i]>='0' && s[i]<='9')
            result=result*16+(s[i]-'0');
        else if(s[i]>='a' && s[i]<='f')
            result=result*16+(s[i]-'a');
        else if(s[i]>='A' && s[i]<='Z')
            result=result*16+(s[i]-'A');
        else
            return -1;
    }
    return result;
}
 int OctToDec(string s){
    int result=0;
    for(int i=0;s[i]!='\0';i++){
        if(s[i]>='0' && s[i]<='7')
            result=result*8+(s[i]-'0');
        else
            return -1;
    }
    return result;
}
 int isKeyWords(string &s){
    for(int i=31;i>=0;i--){
        if(keyWords[i].compare(s)==0)
            return i;
    }
    return 32;
}
 int isPrecompiledDirectives(string &s){
    for(int i=8;i>=0;i--){
        if(precompiledDirectives[i].compare(s)==0)
            return i;
    }
    return -1;
}
 int isSpecialChar(char ch){
    return ch=='n' || ch=='r' || ch=='t' || ch=='v' || ch=='a' || ch=='b' || ch=='f'
        || ch=='\'' || ch=='\"' || ch=='\\' || ch=='?';
}
 char getESC(){
    char ch = sourceCode.nextChar();
    string s;
 
    switch(ch){
        case'n':    return '\n';
        case'r':    return '\r';
        case't':    return '\t';
        case'v':    return '\v';
        case'a':    return '\a';
        case'b':    return '\b';
        case'f':    return '\f';
        case'\'':   return '\'';
        case'\"':   return '\"';
        case'\\':   return '\\';
        case'?':    return '\?';
        case'x':
        case'X':
            ch = sourceCode.nextChar();
            if(digitHexadecimal(ch)){
                s.append(1,ch);
                ch=sourceCode.nextChar();
                if(digitHexadecimal(ch))
                    s.append(1,ch);
                else
                    sourceCode.retract();
                return (char)HexToDec(s);
            }
            else{
                sourceCode.retract();
                sourceCode.retract();
                return -1;
            }
    }
    if(digitOctonary(ch)){
         
        if(sourceCode.getPreChar()=='0' && !digitOctonary(ch)) return '\0';
 
        s.append(1,ch);
        ch=sourceCode.nextChar();
 
        for(int i=0;i<2 && digitOctonary(ch);i++){
            s.append(1,ch);
            ch=sourceCode.nextChar();
        }
        sourceCode.retract();
        return (int)OctToDec(s);
    }
 
    sourceCode.retract();
    return -2;
}
 int scaner(string &token){
    token.clear();
 char ch = getbc();
    if(ch=='_' || letter(ch)){
 
        while(ch=='_' || letter(ch) || digit(ch)){
            token.append(1,ch);
            ch = sourceCode.nextChar();
        }
 
        sourceCode.retract();
        int c=isKeyWords(token);
        if(c!=32) return c;
        else return 32;
    }
 
    if(digit(ch)){
 
        while(digit(ch)){
            token.append(1,ch);
            ch = sourceCode.nextChar();
        }
 
        sourceCode.retract();
        return 33;
    }
 
    switch(ch){
        case'(':    token.append(1,ch);return 34;
        case')':    token.append(1,ch);return 35;
        case'[':    token.append(1,ch);return 36;
        case']':    token.append(1,ch);return 37;
        case'.':    token.append(1,ch);return 38;
        case'~':    token.append(1,ch);return 39;
        case'?':    token.append(1,ch);return 40;
        case':':    token.append(1,ch);return 41;
        case',':    token.append(1,ch);return 42;
        case';':    token.append(1,ch);return 43;
        case'{':    token.append(1,ch);return 44;
        case'}':    token.append(1,ch);return 45;
    }
 
    if(ch=='!'){
        token.append(1,ch);
        ch=sourceCode.nextChar();
        if(ch=='='){
            token.append(1,ch);
            return 47;
        }
        sourceCode.retract();
        return 46;
    }
 
    if(ch=='-'){
        token.append(1,ch);
        ch=sourceCode.nextChar();
        switch(ch){
            case'>': token.append(1,ch);return 49;
            case'-':    token.append(1,ch);return 50;
            case'=':    token.append(1,ch);return 51;
            default:    sourceCode.retract();return 48;
        }
    }
 
    if(ch=='+'){
        token.append(1,ch);
        ch=sourceCode.nextChar();
        switch(ch){
            case'+':    token.append(1,ch);return 53;
            case'=':    token.append(1,ch);return 54;
            default:    sourceCode.retract();return 52;
        }
    }
 
    if(ch=='*'){
        token.append(1,ch);
        ch=sourceCode.nextChar();
        if(ch=='='){
            token.append(1,ch);
            return 56;
        }
        else{
            sourceCode.retract();
            return 55;
        }
    }
 
    if(ch=='&'){
        token.append(1,ch);
        ch=sourceCode.nextChar();
        switch(ch){
            case'&':    token.append(1,ch);return 58;
            case'=':    token.append(1,ch);return 59;
            default:    sourceCode.retract();return 57;
        }
    }
 
    if(ch=='%'){
        token.append(1,ch);
        ch=sourceCode.nextChar();
        if(ch=='='){
            token.append(1,ch);
            return 61;
        }
        else{
            sourceCode.retract();
            return 60;
        }
    }
 
    if(ch=='<'){
        token.append(1,ch);
        ch=sourceCode.nextChar();
        switch (ch){
            case'=':    token.append(1,ch);return 63;
            case'<':
                token.append(1,ch);
                ch=sourceCode.nextChar();
                if(ch=='='){
                    ch=sourceCode.nextChar();
                    return 65;
                }
                else{
                    sourceCode.retract();
                    return 64;
                }
            default:    sourceCode.retract();return 62;
        }
    }
 
    if(ch=='>'){
        token.append(1,ch);
        ch=sourceCode.nextChar();
        switch (ch){
            case'=':    token.append(1,ch);return 67;
            case'>':
                token.append(1,ch);
                ch=sourceCode.nextChar();
                if(ch=='='){
                    ch=sourceCode.nextChar();
                    return 69;
                }
                else{
                    sourceCode.retract();
                    return 68;
                }
            default:    sourceCode.retract();return 66;
        }
    }
 
    if(ch=='='){
        token.append(1,ch);
        ch=sourceCode.nextChar();
        if(ch=='='){
            token.append(1,ch);
            return 71;
        }
        else{
            sourceCode.retract();
            return 70;
        }
    }
 
    if(ch=='|'){
        token.append(1,ch);
        ch=sourceCode.nextChar();
        switch(ch){
            case'|':    token.append(1,ch);return 73;
            case'=':    token.append(1,ch);return 74;
            default:    sourceCode.retract();return 72;
        }
    }
 
    if(ch=='^'){
        token.append(1,ch);
        ch=sourceCode.nextChar();
        if(ch=='='){
            token.append(1,ch);
            return 76;
        }
        else{
            sourceCode.retract();
            return 75;
        }
    }
 
    if(ch=='/'){
        token.append(1,ch);
        ch=sourceCode.nextChar();
        switch (ch)
        {
            case'=':    token.append(1,ch);return 78;
            case'/':
                while(ch!='\r' && ch!='\n' && !sourceCode.end()){
                    token.append(1,ch);
                    ch=sourceCode.nextChar();
                }
                sourceCode.retract();
                return 79;
            case'*':
                sign1:
                while(ch!='*' && !sourceCode.end()){
                    token.append(1,ch);
                    ch=sourceCode.nextChar();
                }
                if(sourceCode.end())
                    return -1;
                else{
                    token.append(1,ch);
                    ch=sourceCode.nextChar();
                    if(ch!='/'){
                    goto sign1;
                    }
                    else{
                        token.append(1,ch);
                        return 80;
                    }
                }
            default:    sourceCode.retract();return 77;
        }
    }
 
    if(ch=='#'){
        ch=getbcBuLFandCR();
        if(ch=='\r' || ch=='\n'){
            token.append(1,'#');
            return 81;
        }
 
        while(letter(ch)){
            token.append(1,ch);
            ch=sourceCode.nextChar();
        }
 
        sourceCode.retract();
 
        int c=isPrecompiledDirectives(token);
 
        if(c==-1)   return -1;
        else{
          token.insert(0,1,'#');
            return c+82;
        }
    }
 
    if(ch=='\''){
        ch=sourceCode.nextChar();
        if(ch=='\'') return 91;
        if(ch!='\\'){
            token.append(1,ch);
            ch=sourceCode.nextChar();
            if(ch=='\'')
                return 91;
            else{
                sourceCode.retract();
                return -1;
            }
        }
 
        ch=getESC();
        if(ch==-1 || ch==-2) return -1;
 
        token.append(1,ch);
        ch=sourceCode.nextChar();
 
        if(ch=='\'') return 91;
        else return -1;
    }
 
    if(ch=='\"'){
        sign2:
        ch=sourceCode.nextChar();
        while(!sourceCode.end() && ch!='\"' && ch!='\\' && ch!='\r' && ch!='\n'){
            token.append(1,ch);
            ch=sourceCode.nextChar();
        }
        if(sourceCode.end()) return -1;
        if(ch=='\\'){
            ch=getESC();
            if(ch==-1) return -1;
            if(ch==-2) ch=sourceCode.nextChar();    //非致命转义,忽略反斜杠             token.append(1,ch);
            goto sign2;
        }
        if(ch=='\"') return 92;
    }
 
    return -1;
}
 int main(){
    fstream input;
    char inpath[100];
    char outpath[100];
    string s,token;
 
    cout << "Please enter the source file's path:";
    cin >> inpath;
    getchar();
 
    input.open(inpath,ios::in);
     
    char ch=input.get();
    while(!input.eof()){
        s.append(1,ch);
        ch = input.get();
    }
 
    sourceCode.setCode(s);
 
    //cout << s;
    while(!sourceCode.end())
    {
        cout << (scaner(token)) << "\t\t" ;
        cout << token << endl;
    }
 
    getchar();
    return 0;
}

c语言词法分析器,vc++6.0运行环境下为什么.clear()和.insert()两处出问题了  求大神解答  劳烦加我QQ390618277

加载中
0
中山野鬼
中山野鬼
你改成c语言,我就帮你看看。c++的。。。。。哈。
中山野鬼
中山野鬼
回复 @邢栋 : 你写成c代码,我可以帮你看看。哈。
邢栋
邢栋
你上去帮我调下吧 我邮箱是390618277@qq.com
0
邢栋
邢栋
你上去调一下 问题不大的
返回顶部
顶部