psproc源码阅读 - 2

main中接下来的函数都比较重要,所以这里就分段来介绍了。

arg_parse(argc,argv);

/* check for invalid combination of arguments */
arg_check_conflicts();

首先是arg_parse。

int arg_parse(int argc, char *argv[]) {
    const char *err = NULL;
    const char *err2 = NULL;
    ps_argc = argc;
    ps_argv = argv;
    thisarg = 0;

    if(personality & PER_FORCE_BSD) goto try_bsd;

    err = parse_all_options();
    if(err) goto try_bsd;
    err = thread_option_check();
    if(err) goto try_bsd;
    err = process_sf_options();
    if(err) goto try_bsd;
    err = select_bits_setup();
    if(err) goto try_bsd;

    choose_dimensions();
    return 0;

try_bsd:
    trace("--------- now try BSD ------\n");

    reset_global();
    reset_parser();
    reset_sortformat();
    format_flags = 0;
    ps_argc = argc;
    ps_argv = argv;
    thisarg = 0;
    /* no need to reset flagptr */
    force_bsd=1;
    prefer_bsd_defaults=1;
    if(!( (PER_OLD_m|PER_BSD_m) & personality )) /* if default m setting... */
        personality |= PER_OLD_m; /* Prefer old Linux over true BSD. */
    /* Do not set PER_FORCE_BSD! It is tested below. */

    err2 = parse_all_options();
    if(err2) goto total_failure;
    err2 = thread_option_check();
    if(err2) goto total_failure;
    err2 = process_sf_options();
    if(err2) goto total_failure;
    err2 = select_bits_setup();
    if(err2) goto total_failure;

    choose_dimensions();
    return 0;

total_failure:
    reset_parser();
    if(personality & PER_FORCE_BSD) fprintf(stderr, _("error: %s\n"), err2);
    else fprintf(stderr, _("error: %s\n"), err);
    do_help(NULL, EXIT_FAILURE);
}

先看第一部分,全局变量personality由set_personality设置,大体就是根据不同的操作系统和架构,来设置不同的参数。

int arg_parse(int argc, char *argv[]) {
    const char *err = NULL;
    const char *err2 = NULL;
    ps_argc = argc;
    ps_argv = argv;
    thisarg = 0;

    if(personality & PER_FORCE_BSD) goto try_bsd;

对BSD而言,其personality是包含PER_FORCE_BSD位的,但是对linux则没有。因此如果有这个位,则优先尝试bsd。

case_bsd:
    personality = PER_FORCE_BSD | PER_BSD_h | PER_BSD_m;
    prefer_bsd_defaults = 1;
    bsd_j_format = "FB_j";
    bsd_l_format = "FB_l";
    /* bsd_s_format not used */
    bsd_u_format = "FB_u";
    bsd_v_format = "FB_v";
    return NULL;

否则继续执行parse_all_options()。

err = parse_all_options();

开始阅读parse_all_options函数,对每个当前的参数,调用arg_type(ps_argv[thisarg])获取其类型。

/* First assume sysv, because that is the POSIX and Unix98 standard. */
static const char *parse_all_options(void) {
    const char *err = NULL;
    int at;
    while(++thisarg < ps_argc) {
        trace("parse_all_options calling arg_type for \"%s\"\n", ps_argv[thisarg]);
        at = arg_type(ps_argv[thisarg]);
        trace("ps_argv[thisarg] is %s\n", ps_argv[thisarg]);
        switch(at) {
        case ARG_GNU:
            err = parse_gnu_option();
            break;
        case ARG_SYSV:
            if(!force_bsd) {  /* else go past case ARG_BSD */
                err = parse_sysv_option();
                break;

                case ARG_BSD:
                    if(force_bsd && !(personality & PER_FORCE_BSD)) return _("way bad");
            }
            prefer_bsd_defaults = 1;
            err = parse_bsd_option();
            break;
        case ARG_PGRP:
        case ARG_SESS:
        case ARG_PID:
            prefer_bsd_defaults = 1;
            err = parse_trailing_pids();
            break;
        case ARG_END:
        case ARG_FAIL:
            trace("              FAIL/END on [%s]\n",ps_argv[thisarg]);
            return _("garbage option");
            break;
        default:
            printf("                  ?    %s\n",ps_argv[thisarg]);
            return _("something broke");
        } /* switch */
        if(err) return err;
    } /* while */
    return NULL;
}

arg_type的定义如下,即:如果是字母开头,则认为是BSD风格的参数。如果是数字开头,则认为是PID,如果是+开头的,则认为是ARG_SESS类型。如果是其他情况且非-开头,认为是非法符号。然后再看下一个字符,如果是字母开头的,认为是SYSV参数(例如-a),如果是数字开头的,则认为是PGRP,如果其他字符且不是-,认为非法。如果是--开头的,再看第三个字符,是不是字母,如果是的话则认为是GNU参数。

static int arg_type(const char *str) {
    int tmp = str[0];
    if((tmp>='a') && (tmp<='z'))   return ARG_BSD;
    if((tmp>='A') && (tmp<='Z'))   return ARG_BSD;
    if((tmp>='0') && (tmp<='9'))   return ARG_PID;
    if(tmp=='+')                   return ARG_SESS;
    if(tmp!='-')                   return ARG_FAIL;
    tmp = str[1];
    if((tmp>='a') && (tmp<='z'))   return ARG_SYSV;
    if((tmp>='A') && (tmp<='Z'))   return ARG_SYSV;
    if((tmp>='0') && (tmp<='9'))   return ARG_PGRP;
    if(tmp!='-')                   return ARG_FAIL;
    tmp = str[2];
    if((tmp>='a') && (tmp<='z'))   return ARG_GNU;
    if((tmp>='A') && (tmp<='Z'))   return ARG_GNU;
    if(tmp=='\0')                  return ARG_END;
    return ARG_FAIL;
}

对比ps的man,即可理解:

DESCRIPTION
       ps displays information about a selection of the active processes.  If you want a
       repetitive update of the selection and the displayed information, use top(1) instead.

       This version of ps accepts several kinds of options:

       1   UNIX options, which may be grouped and must be preceded by a dash.
       2   BSD options, which may be grouped and must not be used with a dash.
       3   GNU long options, which are preceded by two dashes.

总之为了保证兼容性,ps的命令行非常混乱。

加号和数字的作用:

   --sort spec
          Specify sorting order.  Sorting syntax is [+|-]key[,[+|-]key[,...]].  Choose a
          multi-letter key from the STANDARD FORMAT SPECIFIERS section.  The "+" is
          optional since default direction is increasing numerical or lexicographic
          order.  Identical to k.  For example: ps jax --sort=uid,-ppid,+pid

PROCESS SELECTION BY LIST
       These options accept a single argument in the form of a blank-separated or
       comma-separated list.  They can be used multiple times.  For example:
       ps -p "1 2" -p 3,4

       -123   Identical to --pid 123.

       123    Identical to --pid 123.

在所有类型中,ARG_END、ARG_FAIL、default会导致直接退出。

    case ARG_PGRP:
    case ARG_SESS:
    case ARG_PID:
        prefer_bsd_defaults = 1;
        err = parse_trailing_pids();
        break;

PGRP、SESS、PID会使ps进一步解析后面的pid。

/*************** process trailing PIDs  **********************/
static const char *parse_trailing_pids(void) {
    selection_node *pidnode;  /* pid */
    selection_node *grpnode;  /* process group */
    selection_node *sidnode;  /* session */
    char **argp;     /* pointer to pointer to text of PID */
    const char *err;       /* error code that could or did happen */
    int i;

    i = ps_argc - thisarg;  /* how many trailing PIDs, SIDs, PGRPs?? */
    argp = ps_argv + thisarg;
    thisarg = ps_argc - 1;   /* we must be at the end now */

    pidnode = xmalloc(sizeof(selection_node));
    pidnode->u = xmalloc(i*sizeof(sel_union)); /* waste is insignificant */
    pidnode->n = 0;

    grpnode = xmalloc(sizeof(selection_node));
    grpnode->u = xmalloc(i*sizeof(sel_union)); /* waste is insignificant */
    grpnode->n = 0;

    sidnode = xmalloc(sizeof(selection_node));
    sidnode->u = xmalloc(i*sizeof(sel_union)); /* waste is insignificant */
    sidnode->n = 0;

    while(i--) {
        char *data;
        data = *(argp++);
        switch(*data) {
        default:
            err = parse_pid(  data, pidnode->u + pidnode->n++);
            break;
        case '-':
            err = parse_pid(++data, grpnode->u + grpnode->n++);
            break;
        case '+':
            err = parse_pid(++data, sidnode->u + sidnode->n++);
            break;
        }
        if(err) return err;     /* the node gets freed with the list */
    }

    if(pidnode->n) {
        pidnode->next = selection_list;
        selection_list = pidnode;
        selection_list->typecode = SEL_PID;
    }  /* else free both parts */

    if(grpnode->n) {
        grpnode->next = selection_list;
        selection_list = grpnode;
        selection_list->typecode = SEL_PGRP;
    }  /* else free both parts */

    if(sidnode->n) {
        sidnode->next = selection_list;
        selection_list = sidnode;
        selection_list->typecode = SEL_SESS;
    }  /* else free both parts */

    return NULL;
}

解析时要求它是一个1~0x7fffffff的正整数。并按+、-、默认的情况分别放置在sid/gid/pidnode中。

static const char *parse_pid(char *str, sel_union *ret) {
    char *endp;
    unsigned long num;
    num = strtoul(str, &endp, 0);
    if(*endp != '\0')      return _("process ID list syntax error");
    if(num<1)              return _("process ID out of range");
    if(num > 0x7fffffffUL) return _("process ID out of range");
    ret->pid = num;
    return 0;
}

对ARG_GNU而言,处理函数是parse_gnu_option

parser.c:parse_all_options

    case ARG_GNU:
        err = parse_gnu_option();
        break;

parse_gnu_option的开头列出了一组支持的参数。

static const gnu_table_struct gnu_table[] = {
    {"Group",         &&case_Group},       /* rgid */
    {"User",          &&case_User},        /* ruid */
    {"cols",          &&case_cols},
    {"columns",       &&case_columns},
    {"context",       &&case_context},
    {"cumulative",    &&case_cumulative},
    {"deselect",      &&case_deselect},    /* -N */
    {"forest",        &&case_forest},      /* f -H */

这里的case_Group之类的不是什么全局变量,而是本地标签,第一次看到能这么用,很神奇……

        {"version",       &&case_version},
        {"width",         &&case_width},
    };
    const int gnu_table_count = sizeof(gnu_table)/sizeof(gnu_table_struct);

    s = ps_argv[thisarg]+2;
    sl = strcspn(s,":=");
    if(sl > 15) return _("unknown gnu long option");
    strncpy(buf, s, sl);
    buf[sl] = '\0';
    flagptr = s+sl;

    found = bsearch(&findme, gnu_table, gnu_table_count,
                    sizeof(gnu_table_struct), compare_gnu_table_structs
                   );

    if(!found) {
        if (!strcmp(buf, the_word_help))
            goto case_help;
        return _("unknown gnu long option");
    }

    goto *(found->jump);    /* See gcc extension info.  :-)   */

case_Group:
    trace("--Group\n");
    arg = grab_gnu_arg();
    if(!arg) return _("list of real groups must follow --Group");
    err=parse_list(arg, parse_gid);
    if(err) return err;
    selection_list->typecode = SEL_RGID;
    return NULL;
case_User:
    trace("--User\n");
    arg = grab_gnu_arg();
    if(!arg) return _("list of real users must follow --User");
    err=parse_list(arg, parse_uid);
    if(err) return err;
    selection_list->typecode = SEL_RUID;
    return NULL;

先逐行读一下代码。s是argv[i] + 2,这是因为ARG_GNU是“--”开头的,跳过前两个字符。sl是:=前的字符数。然后将:=前的内容拷贝到buf中。buf定义为buf[16]所以限制长度不能大于15。在这之后,flagptr就是:=开始的字符。

然后,通过bsearch库函数在gnu_table中搜索findme={buf, NULL}。如果找到就直接跳到对应标签上,这语法也是很离谱。

先看看man手册中对这些参数的定义:

   --cols n
          Set screen width.

   --columns n
          Set screen width.

   --cumulative
          Include some dead child process data (as a sum with the parent).

有但不是全有,比如--Group就不在主词条里面(但在其他词条的描述里有提到)。挑几个比较有特点的读一下好了。

首先是它们的一个通用工具函数grab_gnu_arg,它在所有需要额外参数的,比如--cols n中被使用。

/*
 * Return the argument or NULL
 */
static const char *grab_gnu_arg(void) {
    switch(*flagptr) {    /* argument is part of ps_argv[thisarg] */
    default:
        return NULL;                     /* something bad */
    case '=':
    case ':':
        if(*++flagptr) return flagptr;   /* found it */
        return NULL;                     /* empty '=' or ':' */
    case '\0': /* try next argv[] */
        ;
    }
    if(thisarg+2 > ps_argc) return NULL;   /* there is nothing left */
    /* argument follows ps_argv[thisarg] */
    if(*(ps_argv[thisarg+1]) == '\0') return NULL;
    return ps_argv[++thisarg];
}
//
//<---->
//
case_cols:
case_width:
case_columns:
    trace("--cols\n");
    arg = grab_gnu_arg();
    if(arg && *arg) {
        long t;
        char *endptr;
        t = strtol(arg, &endptr, 0);
        if(!*endptr && (t>0) && (t<2000000000)) {
            screen_cols = (int)t;
            return NULL;
        }
    }
    return _("number of columns must follow --cols, --width, or --columns");

如果指定的是例如--cols=2,--cols:2,则返回=和:之后的部分。如果当前命令已经到头(\0),则看看下一个命令是不是有效的(非"\0"),如果是,返回,thisarg+1。

以--cols为例,这里设置screen_cols为“0~2000000000”中间的一个整数(用0x7ffffff不好吗……)。

再挑一个典型。

case_Group:
    trace("--Group\n");
    arg = grab_gnu_arg();
    if(!arg) return _("list of real groups must follow --Group");
    err=parse_list(arg, parse_gid);
    if(err) return err;
    selection_list->typecode = SEL_RGID;
    return NULL;

--Group这样后面跟一个list的,还需要parse_list来处理列表。

/*
 * Used to parse lists in a generic way. (function pointers)
 */
static const char *parse_list(const char *arg, const char *(*parse_fn)(char *, sel_union *) ) {
    selection_node *node;
    char *buf;                      /* temp copy of arg to hack on */
    char *sep_loc;                  /* separator location: " \t," */
    char *walk;
    int items;
    int need_item;
    const char *err;       /* error code that could or did happen */
    /*** prepare to operate ***/
    node = xmalloc(sizeof(selection_node));
    node->u = xmalloc(strlen(arg)*sizeof(sel_union)); /* waste is insignificant */
    node->n = 0;
    buf = strdup(arg);
    /*** sanity check and count items ***/
    need_item = 1; /* true */
    items = 0;
    walk = buf;
    err = _("improper list");
    do {
        switch(*walk) {
        case ' ':
        case ',':
        case '\t':
        case '\0':
            if(need_item) goto parse_error;
            need_item=1;
            break;
        default:
            if(need_item) items++;
            need_item=0;
        }
    } while (*++walk);
    if(need_item) goto parse_error;
    node->n = items;
    /*** actually parse the list ***/
    walk = buf;
    while(items--) {
        sep_loc = strpbrk(walk," ,\t");
        if(sep_loc) *sep_loc = '\0';
        if(( err=(parse_fn)(walk, node->u+items) )) goto parse_error;
        walk = sep_loc + 1; /* point to next item, if any */
    }
    free(buf);
    node->next = selection_list;
    selection_list = node;
    return NULL;
parse_error:
    free(buf);
    free(node->u);
    free(node);
    return err;
}

逐字查找,如果是逗号或者空白,则标记need_item,它们后面必须跟其他字符,统计一共有多少段。
然后设置node->n为计算到的总数。从头开始扫描空白或逗号,并对扫出来的部分调用parse_fn。parse_fn是传入的参数之一,看一下典型的parse_fn,这个是"C"参数传入的parse_fn,它将向ret->cmd(即(node->u + items)->cmd内拷贝长度为sizeof ret->cmd的字符串)。

static const char *parse_cmd(char *str, sel_union *ret) {
    strncpy(ret->cmd, str, sizeof ret->cmd);  // strncpy pads to end
    ret->cmd[sizeof(ret->cmd)-1] = '\0';      // but let's be safe
    return 0;
}

关于sel_union->cmd,在common.h中有定义。sizeof ret->cmd必然也就是64了。在parse_list的开头注意有:“node->u = xmalloc(strlen(arg)sizeof(sel_union));”,arg就是列表的长度,因此这里会试图分配列表长度64字节的数组给node->u。

typedef union sel_union {
    pid_t pid;
    pid_t ppid;
    uid_t uid;
    gid_t gid;
    dev_t tty;
    char  cmd[64];  /* this is _not_ \0 terminated */
} sel_union;

typedef struct selection_node {
    struct selection_node *next;
    sel_union *u;  /* used if selection type has a list of values */
    int n;         /* used if selection type has a list of values */
    int typecode;
} selection_node;

这篇已经足够长了,而且已经看完了parse_gnu_option部分。下一篇看看剩余的两个 sysv_option 和 bsd_option。

    case ARG_SYSV:
        if(!force_bsd) {  /* else go past case ARG_BSD */
            err = parse_sysv_option();
            break;

            case ARG_BSD:
                if(force_bsd && !(personality & PER_FORCE_BSD)) return _("way bad");
        }
        prefer_bsd_defaults = 1;
        err = parse_bsd_option();
        break;

标签:none

添加新评论

captcha
请输入验证码