php内核学习(2) 开始的时候,找函数下断点找得够呛,网上查资料想起来,php内核的操作符都在zend的zend_operators.c里面,应该去这里面找的==操作符的函数下断点,然后进行调试,此模式为cgi模式
test.php
1 2 <?php var_dump("0e12331" =="0e4543" );
cgi_main.c main() 1 2 3 4 5 6 7 8 9 10 11 12 13 if ((query_string = getenv("QUERY_STRING" )) != NULL && strchr (query_string, '=' ) == NULL ) { unsigned char *p; decoded_query_string = strdup(query_string); php_url_decode(decoded_query_string, strlen (decoded_query_string)); for (p = (unsigned char *)decoded_query_string; *p && *p <= ' ' ; p++) { } if (*p == '-' ) { skip_getopt = 1 ; } free (decoded_query_string); }
url中查询中编码的字符串在此处解码
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 if (SG(request_info).path_translated || cgi || fastcgi) { zend_stream_init_filename(&file_handle, SG(request_info).path_translated); } else { zend_stream_init_fp(&file_handle, stdin , "Standard input code" ); } if (php_request_startup() == FAILURE) { if (fastcgi) { fcgi_finish_request(request, 1 ); } SG(server_context) = NULL ; php_module_shutdown(); return FAILURE; }
将服务器传输过来的数据存入结构体
main.c php_execute_script() 这个和后面的查找函数,分析数据就和之前第一版的一样了,这里主要是研究php弱类型,所以这次我们进入zend_compile_file()的函数编译阶段
phar.c phar_compile_file() 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 if (strstr (file_handle->filename, ".phar" ) && !strstr (file_handle->filename, "://" )) { if (SUCCESS == phar_open_from_filename((char *)file_handle->filename, strlen (file_handle->filename), NULL , 0 , 0 , &phar, NULL )) { if (phar->is_zip || phar->is_tar) { zend_file_handle f = *file_handle; spprintf(&name, 4096 , "phar://%s/%s" , file_handle->filename, ".phar/stub.php" ); if (SUCCESS == phar_orig_zend_open((const char *)name, &f)) { efree(name); name = NULL ; f.filename = file_handle->filename; if (f.opened_path) { efree(f.opened_path); } f.opened_path = file_handle->opened_path; f.free_filename = file_handle->free_filename; switch (file_handle->type) { case ZEND_HANDLE_STREAM: if (file_handle->handle.stream.closer && file_handle->handle.stream.handle) { file_handle->handle.stream.closer(file_handle->handle.stream.handle); } file_handle->handle.stream.handle = NULL ; break ; default : break ; } *file_handle = f; } } else if (phar->flags & PHAR_FILE_COMPRESSION_MASK) { zend_file_handle_dtor(file_handle); file_handle->type = ZEND_HANDLE_STREAM; file_handle->handle.stream.handle = phar; file_handle->handle.stream.reader = phar_zend_stream_reader; file_handle->handle.stream.closer = NULL ; file_handle->handle.stream.fsizer = phar_zend_stream_fsizer; file_handle->handle.stream.isatty = 0 ; phar->is_persistent ? php_stream_rewind(PHAR_G(cached_fp)[phar->phar_pos].fp) : php_stream_rewind(phar->fp); } } }
这里对phar的处理,就解释了为什么phar://phar.phar/xxx后面可以是任意的文件名了。
zend_language_scanner.l compile_file() 1 2 3 4 5 6 7 8 9 10 11 12 if (open_file_for_scanning(file_handle)==FAILURE) { if (!EG(exception)) { if (type==ZEND_REQUIRE) { zend_message_dispatcher(ZMSG_FAILED_REQUIRE_FOPEN, file_handle->filename); zend_bailout(); } else { zend_message_dispatcher(ZMSG_FAILED_INCLUDE_FOPEN, file_handle->filename); } } } else { op_array = zend_compile(ZEND_USER_FUNCTION); }
打开之前传人的文件,即获取文件内容,接着就是调用re2c和bison进行语法解析和词法解析,将代码解析成抽象语法树(ast树)的过程了。
zend_operators.c is_equal_function() 1 2 3 4 5 ZEND_API int ZEND_FASTCALL is_equal_function (zval *result, zval *op1, zval *op2) { ZVAL_BOOL(result, zend_compare(op1, op2) == 0 ); return SUCCESS; }
此处主要是调用zend_compare()来进行比较
zend_operators.c zend_compare() 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 while (1 ) { switch (TYPE_PAIR(Z_TYPE_P(op1), Z_TYPE_P(op2))) { case TYPE_PAIR (IS_LONG, IS_LONG) : return Z_LVAL_P(op1)>Z_LVAL_P(op2)?1:(Z_LVAL_P(op1)<Z_LVAL_P(op2)?-1:0); case TYPE_PAIR (IS_DOUBLE, IS_LONG) : return ZEND_NORMALIZE_BOOL (Z_DVAL_P(op1) - (double )Z_LVAL_P(op2)) ; case TYPE_PAIR (IS_LONG, IS_DOUBLE) : return ZEND_NORMALIZE_BOOL ((double )Z_LVAL_P(op1) - Z_DVAL_P(op2)) ; case TYPE_PAIR (IS_DOUBLE, IS_DOUBLE) : if (Z_DVAL_P(op1) == Z_DVAL_P(op2)) { return 0 ; } else { return ZEND_NORMALIZE_BOOL(Z_DVAL_P(op1) - Z_DVAL_P(op2)); } case TYPE_PAIR (IS_ARRAY, IS_ARRAY) : return zend_compare_arrays (op1, op2) ; case TYPE_PAIR (IS_NULL, IS_NULL) : case TYPE_PAIR (IS_NULL, IS_FALSE) : case TYPE_PAIR (IS_FALSE, IS_NULL) : case TYPE_PAIR (IS_FALSE, IS_FALSE) : case TYPE_PAIR (IS_TRUE, IS_TRUE) : return 0 ; case TYPE_PAIR (IS_NULL, IS_TRUE) : return -1; case TYPE_PAIR (IS_TRUE, IS_NULL) : return 1 ; case TYPE_PAIR (IS_STRING, IS_STRING) : if (Z_STR_P(op1) == Z_STR_P(op2)) { return 0 ; } return zendi_smart_strcmp(Z_STR_P(op1), Z_STR_P(op2)); case TYPE_PAIR (IS_NULL, IS_STRING) : return Z_STRLEN_P (op2) == 0 ? 0 : -1 ; case TYPE_PAIR (IS_STRING, IS_NULL) : return Z_STRLEN_P (op1) == 0 ? 0 : 1 ; case TYPE_PAIR (IS_OBJECT, IS_NULL) : return 1 ; case TYPE_PAIR (IS_NULL, IS_OBJECT) : return -1; default : if (Z_ISREF_P(op1)) { op1 = Z_REFVAL_P(op1); continue ; } else if (Z_ISREF_P(op2)) { op2 = Z_REFVAL_P(op2); continue ; } if (Z_TYPE_P(op1) == IS_OBJECT && Z_TYPE_P(op2) == IS_OBJECT && Z_OBJ_P(op1) == Z_OBJ_P(op2)) { return 0 ; } else if (Z_TYPE_P(op1) == IS_OBJECT) { return Z_OBJ_HANDLER_P(op1, compare)(op1, op2); } else if (Z_TYPE_P(op2) == IS_OBJECT) { return Z_OBJ_HANDLER_P(op2, compare)(op1, op2); } if (!converted) { if (Z_TYPE_P(op1) < IS_TRUE) { return zval_is_true(op2) ? -1 : 0 ; } else if (Z_TYPE_P(op1) == IS_TRUE) { return zval_is_true(op2) ? 0 : 1 ; } else if (Z_TYPE_P(op2) < IS_TRUE) { return zval_is_true(op1) ? 1 : 0 ; } else if (Z_TYPE_P(op2) == IS_TRUE) { return zval_is_true(op1) ? 0 : -1 ; } else { op1 = _zendi_convert_scalar_to_number(op1, &op1_copy); op2 = _zendi_convert_scalar_to_number(op2, &op2_copy); if (EG(exception)) { return 1 ; } converted = 1 ; } } else if (Z_TYPE_P(op1)==IS_ARRAY) { return 1 ; } else if (Z_TYPE_P(op2)==IS_ARRAY) { return -1 ; } else { ZEND_ASSERT(0 ); zend_throw_error(NULL , "Unsupported operand types" ); return 1 ; } } }
本次进入case TYPE_PAIR(IS_STRING, IS_STRING),然后跟进zendi_smart_strcmp()
zend_operators.c zendi_smart_strcmp() 1 2 if ((ret1 = is_numeric_string_ex(s1->val, s1->len, &lval1, &dval1, 0 , &oflow1)) && (ret2 = is_numeric_string_ex(s2->val, s2->len, &lval2, &dval2, 0 , &oflow2)))
跟进is_numeric_string_ex()
zend_operators.c _is_numeric_string_ex() 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 while (*str == ' ' || *str == '\t' || *str == '\n' || *str == '\r' || *str == '\v' || *str == '\f' ) { str++; length--; } ptr = str; if (*ptr == '-' ) { neg = 1 ; ptr++; } else if (*ptr == '+' ) { ptr++; } if (ZEND_IS_DIGIT(*ptr)) { while (*ptr == '0' ) { ptr++; } for (type = IS_LONG; !(digits >= MAX_LENGTH_OF_LONG && (dval || allow_errors == 1 )); digits++, ptr++) { check_digits: if (ZEND_IS_DIGIT(*ptr)) { tmp_lval = tmp_lval * 10 + (*ptr) - '0' ; continue ; } else if (*ptr == '.' && dp_or_e < 1 ) { goto process_double; } else if ((*ptr == 'e' || *ptr == 'E' ) && dp_or_e < 2 ) { const char *e = ptr + 1 ; if (*e == '-' || *e == '+' ) { ptr = e++; } if (ZEND_IS_DIGIT(*e)) { goto process_double; } } break ; }
这是这个函数开始对字符串进行的过滤,过滤掉那些对字符串转数字有影响的字符。过滤完成以后再检查小数的位数,因为检查到了e,然后再检查后面的数字,其中local_dval = zend_strtod(str, &ptr);
很关键,我们可以看看zned_strtod()的内容。
1 2 3 4 if (se) *se = (char *)s; return sign ? -dval(&rv) : dval(&rv); }
在两个函数都执行完这一套流程后,就进行比较,最后返回true