psproc源码阅读 - 2
main中接下来的函数都比较重要,所以这里就分段来介绍了。
arg_parse(argc,argv);
/* check for invalid combination of arguments */
arg_check_conflicts();
首先是arg_parse。
int arg_parse(int argc, char *argv[]) {
const char *err = NULL;
const char *err2 = NULL;
ps_argc = argc;
ps_argv = argv;
thisarg = 0;
if(personality & PER_FORCE_BSD) goto try_bsd;
err = parse_all_options();
if(err) goto try_bsd;
err = thread_option_check();
if(err) goto try_bsd;
err = process_sf_options();
if(err) goto try_bsd;
err = select_bits_setup();
if(err) goto try_bsd;
choose_dimensions();
return 0;
try_bsd:
trace("--------- now try BSD ------\n");
reset_global();
reset_parser();
reset_sortformat();
format_flags = 0;
ps_argc = argc;
ps_argv = argv;
thisarg = 0;
/* no need to reset flagptr */
force_bsd=1;
prefer_bsd_defaults=1;
if(!( (PER_OLD_m|PER_BSD_m) & personality )) /* if default m setting... */
personality |= PER_OLD_m; /* Prefer old Linux over true BSD. */
/* Do not set PER_FORCE_BSD! It is tested below. */
err2 = parse_all_options();
if(err2) goto total_failure;
err2 = thread_option_check();
if(err2) goto total_failure;
err2 = process_sf_options();
if(err2) goto total_failure;
err2 = select_bits_setup();
if(err2) goto total_failure;
choose_dimensions();
return 0;
total_failure:
reset_parser();
if(personality & PER_FORCE_BSD) fprintf(stderr, _("error: %s\n"), err2);
else fprintf(stderr, _("error: %s\n"), err);
do_help(NULL, EXIT_FAILURE);
}
先看第一部分,全局变量personality由set_personality设置,大体就是根据不同的操作系统和架构,来设置不同的参数。
int arg_parse(int argc, char *argv[]) {
const char *err = NULL;
const char *err2 = NULL;
ps_argc = argc;
ps_argv = argv;
thisarg = 0;
if(personality & PER_FORCE_BSD) goto try_bsd;
对BSD而言,其personality是包含PER_FORCE_BSD位的,但是对linux则没有。因此如果有这个位,则优先尝试bsd。
case_bsd:
personality = PER_FORCE_BSD | PER_BSD_h | PER_BSD_m;
prefer_bsd_defaults = 1;
bsd_j_format = "FB_j";
bsd_l_format = "FB_l";
/* bsd_s_format not used */
bsd_u_format = "FB_u";
bsd_v_format = "FB_v";
return NULL;
否则继续执行parse_all_options()。
err = parse_all_options();
开始阅读parse_all_options函数,对每个当前的参数,调用arg_type(ps_argv[thisarg])获取其类型。
/* First assume sysv, because that is the POSIX and Unix98 standard. */
static const char *parse_all_options(void) {
const char *err = NULL;
int at;
while(++thisarg < ps_argc) {
trace("parse_all_options calling arg_type for \"%s\"\n", ps_argv[thisarg]);
at = arg_type(ps_argv[thisarg]);
trace("ps_argv[thisarg] is %s\n", ps_argv[thisarg]);
switch(at) {
case ARG_GNU:
err = parse_gnu_option();
break;
case ARG_SYSV:
if(!force_bsd) { /* else go past case ARG_BSD */
err = parse_sysv_option();
break;
case ARG_BSD:
if(force_bsd && !(personality & PER_FORCE_BSD)) return _("way bad");
}
prefer_bsd_defaults = 1;
err = parse_bsd_option();
break;
case ARG_PGRP:
case ARG_SESS:
case ARG_PID:
prefer_bsd_defaults = 1;
err = parse_trailing_pids();
break;
case ARG_END:
case ARG_FAIL:
trace(" FAIL/END on [%s]\n",ps_argv[thisarg]);
return _("garbage option");
break;
default:
printf(" ? %s\n",ps_argv[thisarg]);
return _("something broke");
} /* switch */
if(err) return err;
} /* while */
return NULL;
}
arg_type的定义如下,即:如果是字母开头,则认为是BSD风格的参数。如果是数字开头,则认为是PID,如果是+开头的,则认为是ARG_SESS类型。如果是其他情况且非-开头,认为是非法符号。然后再看下一个字符,如果是字母开头的,认为是SYSV参数(例如-a),如果是数字开头的,则认为是PGRP,如果其他字符且不是-,认为非法。如果是--开头的,再看第三个字符,是不是字母,如果是的话则认为是GNU参数。
static int arg_type(const char *str) {
int tmp = str[0];
if((tmp>='a') && (tmp<='z')) return ARG_BSD;
if((tmp>='A') && (tmp<='Z')) return ARG_BSD;
if((tmp>='0') && (tmp<='9')) return ARG_PID;
if(tmp=='+') return ARG_SESS;
if(tmp!='-') return ARG_FAIL;
tmp = str[1];
if((tmp>='a') && (tmp<='z')) return ARG_SYSV;
if((tmp>='A') && (tmp<='Z')) return ARG_SYSV;
if((tmp>='0') && (tmp<='9')) return ARG_PGRP;
if(tmp!='-') return ARG_FAIL;
tmp = str[2];
if((tmp>='a') && (tmp<='z')) return ARG_GNU;
if((tmp>='A') && (tmp<='Z')) return ARG_GNU;
if(tmp=='\0') return ARG_END;
return ARG_FAIL;
}
对比ps的man,即可理解:
DESCRIPTION
ps displays information about a selection of the active processes. If you want a
repetitive update of the selection and the displayed information, use top(1) instead.
This version of ps accepts several kinds of options:
1 UNIX options, which may be grouped and must be preceded by a dash.
2 BSD options, which may be grouped and must not be used with a dash.
3 GNU long options, which are preceded by two dashes.
总之为了保证兼容性,ps的命令行非常混乱。
加号和数字的作用:
--sort spec
Specify sorting order. Sorting syntax is [+|-]key[,[+|-]key[,...]]. Choose a
multi-letter key from the STANDARD FORMAT SPECIFIERS section. The "+" is
optional since default direction is increasing numerical or lexicographic
order. Identical to k. For example: ps jax --sort=uid,-ppid,+pid
PROCESS SELECTION BY LIST
These options accept a single argument in the form of a blank-separated or
comma-separated list. They can be used multiple times. For example:
ps -p "1 2" -p 3,4
-123 Identical to --pid 123.
123 Identical to --pid 123.
在所有类型中,ARG_END、ARG_FAIL、default会导致直接退出。
case ARG_PGRP:
case ARG_SESS:
case ARG_PID:
prefer_bsd_defaults = 1;
err = parse_trailing_pids();
break;
PGRP、SESS、PID会使ps进一步解析后面的pid。
/*************** process trailing PIDs **********************/
static const char *parse_trailing_pids(void) {
selection_node *pidnode; /* pid */
selection_node *grpnode; /* process group */
selection_node *sidnode; /* session */
char **argp; /* pointer to pointer to text of PID */
const char *err; /* error code that could or did happen */
int i;
i = ps_argc - thisarg; /* how many trailing PIDs, SIDs, PGRPs?? */
argp = ps_argv + thisarg;
thisarg = ps_argc - 1; /* we must be at the end now */
pidnode = xmalloc(sizeof(selection_node));
pidnode->u = xmalloc(i*sizeof(sel_union)); /* waste is insignificant */
pidnode->n = 0;
grpnode = xmalloc(sizeof(selection_node));
grpnode->u = xmalloc(i*sizeof(sel_union)); /* waste is insignificant */
grpnode->n = 0;
sidnode = xmalloc(sizeof(selection_node));
sidnode->u = xmalloc(i*sizeof(sel_union)); /* waste is insignificant */
sidnode->n = 0;
while(i--) {
char *data;
data = *(argp++);
switch(*data) {
default:
err = parse_pid( data, pidnode->u + pidnode->n++);
break;
case '-':
err = parse_pid(++data, grpnode->u + grpnode->n++);
break;
case '+':
err = parse_pid(++data, sidnode->u + sidnode->n++);
break;
}
if(err) return err; /* the node gets freed with the list */
}
if(pidnode->n) {
pidnode->next = selection_list;
selection_list = pidnode;
selection_list->typecode = SEL_PID;
} /* else free both parts */
if(grpnode->n) {
grpnode->next = selection_list;
selection_list = grpnode;
selection_list->typecode = SEL_PGRP;
} /* else free both parts */
if(sidnode->n) {
sidnode->next = selection_list;
selection_list = sidnode;
selection_list->typecode = SEL_SESS;
} /* else free both parts */
return NULL;
}
解析时要求它是一个1~0x7fffffff的正整数。并按+、-、默认的情况分别放置在sid/gid/pidnode中。
static const char *parse_pid(char *str, sel_union *ret) {
char *endp;
unsigned long num;
num = strtoul(str, &endp, 0);
if(*endp != '\0') return _("process ID list syntax error");
if(num<1) return _("process ID out of range");
if(num > 0x7fffffffUL) return _("process ID out of range");
ret->pid = num;
return 0;
}
对ARG_GNU而言,处理函数是parse_gnu_option
parser.c:parse_all_options
case ARG_GNU:
err = parse_gnu_option();
break;
parse_gnu_option的开头列出了一组支持的参数。
static const gnu_table_struct gnu_table[] = {
{"Group", &&case_Group}, /* rgid */
{"User", &&case_User}, /* ruid */
{"cols", &&case_cols},
{"columns", &&case_columns},
{"context", &&case_context},
{"cumulative", &&case_cumulative},
{"deselect", &&case_deselect}, /* -N */
{"forest", &&case_forest}, /* f -H */
这里的case_Group之类的不是什么全局变量,而是本地标签,第一次看到能这么用,很神奇……
{"version", &&case_version},
{"width", &&case_width},
};
const int gnu_table_count = sizeof(gnu_table)/sizeof(gnu_table_struct);
s = ps_argv[thisarg]+2;
sl = strcspn(s,":=");
if(sl > 15) return _("unknown gnu long option");
strncpy(buf, s, sl);
buf[sl] = '\0';
flagptr = s+sl;
found = bsearch(&findme, gnu_table, gnu_table_count,
sizeof(gnu_table_struct), compare_gnu_table_structs
);
if(!found) {
if (!strcmp(buf, the_word_help))
goto case_help;
return _("unknown gnu long option");
}
goto *(found->jump); /* See gcc extension info. :-) */
case_Group:
trace("--Group\n");
arg = grab_gnu_arg();
if(!arg) return _("list of real groups must follow --Group");
err=parse_list(arg, parse_gid);
if(err) return err;
selection_list->typecode = SEL_RGID;
return NULL;
case_User:
trace("--User\n");
arg = grab_gnu_arg();
if(!arg) return _("list of real users must follow --User");
err=parse_list(arg, parse_uid);
if(err) return err;
selection_list->typecode = SEL_RUID;
return NULL;
先逐行读一下代码。s是argv[i] + 2,这是因为ARG_GNU是“--”开头的,跳过前两个字符。sl是:=前的字符数。然后将:=前的内容拷贝到buf中。buf定义为buf[16]所以限制长度不能大于15。在这之后,flagptr就是:=开始的字符。
然后,通过bsearch库函数在gnu_table中搜索findme={buf, NULL}。如果找到就直接跳到对应标签上,这语法也是很离谱。
先看看man手册中对这些参数的定义:
--cols n
Set screen width.
--columns n
Set screen width.
--cumulative
Include some dead child process data (as a sum with the parent).
有但不是全有,比如--Group就不在主词条里面(但在其他词条的描述里有提到)。挑几个比较有特点的读一下好了。
首先是它们的一个通用工具函数grab_gnu_arg,它在所有需要额外参数的,比如--cols n中被使用。
/*
* Return the argument or NULL
*/
static const char *grab_gnu_arg(void) {
switch(*flagptr) { /* argument is part of ps_argv[thisarg] */
default:
return NULL; /* something bad */
case '=':
case ':':
if(*++flagptr) return flagptr; /* found it */
return NULL; /* empty '=' or ':' */
case '\0': /* try next argv[] */
;
}
if(thisarg+2 > ps_argc) return NULL; /* there is nothing left */
/* argument follows ps_argv[thisarg] */
if(*(ps_argv[thisarg+1]) == '\0') return NULL;
return ps_argv[++thisarg];
}
//
//<---->
//
case_cols:
case_width:
case_columns:
trace("--cols\n");
arg = grab_gnu_arg();
if(arg && *arg) {
long t;
char *endptr;
t = strtol(arg, &endptr, 0);
if(!*endptr && (t>0) && (t<2000000000)) {
screen_cols = (int)t;
return NULL;
}
}
return _("number of columns must follow --cols, --width, or --columns");
如果指定的是例如--cols=2,--cols:2,则返回=和:之后的部分。如果当前命令已经到头(\0),则看看下一个命令是不是有效的(非"\0"),如果是,返回,thisarg+1。
以--cols为例,这里设置screen_cols为“0~2000000000”中间的一个整数(用0x7ffffff不好吗……)。
再挑一个典型。
case_Group:
trace("--Group\n");
arg = grab_gnu_arg();
if(!arg) return _("list of real groups must follow --Group");
err=parse_list(arg, parse_gid);
if(err) return err;
selection_list->typecode = SEL_RGID;
return NULL;
--Group这样后面跟一个list的,还需要parse_list来处理列表。
/*
* Used to parse lists in a generic way. (function pointers)
*/
static const char *parse_list(const char *arg, const char *(*parse_fn)(char *, sel_union *) ) {
selection_node *node;
char *buf; /* temp copy of arg to hack on */
char *sep_loc; /* separator location: " \t," */
char *walk;
int items;
int need_item;
const char *err; /* error code that could or did happen */
/*** prepare to operate ***/
node = xmalloc(sizeof(selection_node));
node->u = xmalloc(strlen(arg)*sizeof(sel_union)); /* waste is insignificant */
node->n = 0;
buf = strdup(arg);
/*** sanity check and count items ***/
need_item = 1; /* true */
items = 0;
walk = buf;
err = _("improper list");
do {
switch(*walk) {
case ' ':
case ',':
case '\t':
case '\0':
if(need_item) goto parse_error;
need_item=1;
break;
default:
if(need_item) items++;
need_item=0;
}
} while (*++walk);
if(need_item) goto parse_error;
node->n = items;
/*** actually parse the list ***/
walk = buf;
while(items--) {
sep_loc = strpbrk(walk," ,\t");
if(sep_loc) *sep_loc = '\0';
if(( err=(parse_fn)(walk, node->u+items) )) goto parse_error;
walk = sep_loc + 1; /* point to next item, if any */
}
free(buf);
node->next = selection_list;
selection_list = node;
return NULL;
parse_error:
free(buf);
free(node->u);
free(node);
return err;
}
逐字查找,如果是逗号或者空白,则标记need_item,它们后面必须跟其他字符,统计一共有多少段。
然后设置node->n为计算到的总数。从头开始扫描空白或逗号,并对扫出来的部分调用parse_fn。parse_fn是传入的参数之一,看一下典型的parse_fn,这个是"C"参数传入的parse_fn,它将向ret->cmd(即(node->u + items)->cmd内拷贝长度为sizeof ret->cmd的字符串)。
static const char *parse_cmd(char *str, sel_union *ret) {
strncpy(ret->cmd, str, sizeof ret->cmd); // strncpy pads to end
ret->cmd[sizeof(ret->cmd)-1] = '\0'; // but let's be safe
return 0;
}
关于sel_union->cmd,在common.h中有定义。sizeof ret->cmd必然也就是64了。在parse_list的开头注意有:“node->u = xmalloc(strlen(arg)sizeof(sel_union));”,arg就是列表的长度,因此这里会试图分配列表长度64字节的数组给node->u。
typedef union sel_union {
pid_t pid;
pid_t ppid;
uid_t uid;
gid_t gid;
dev_t tty;
char cmd[64]; /* this is _not_ \0 terminated */
} sel_union;
typedef struct selection_node {
struct selection_node *next;
sel_union *u; /* used if selection type has a list of values */
int n; /* used if selection type has a list of values */
int typecode;
} selection_node;
这篇已经足够长了,而且已经看完了parse_gnu_option部分。下一篇看看剩余的两个 sysv_option 和 bsd_option。
case ARG_SYSV:
if(!force_bsd) { /* else go past case ARG_BSD */
err = parse_sysv_option();
break;
case ARG_BSD:
if(force_bsd && !(personality & PER_FORCE_BSD)) return _("way bad");
}
prefer_bsd_defaults = 1;
err = parse_bsd_option();
break;