Skip to the content.

AST 到 字节码

目录

相关位置文件

我们来尝试从之前的 语法树 中生成对应的字节码

前面提到过的 pythonrun 的调用栈如下

pythonrun

这次关注的是 PyAST_CompileObject 这个函数

PyCodeObject *
PyAST_CompileObject(mod_ty mod, PyObject *filename, PyCompilerFlags *flags,
                   int optimize, PyArena *arena)
{
    struct compiler c;
    PyCodeObject *co = NULL;
    PyCompilerFlags local_flags;
    int merged;
    // ... 忽略 ...

    if (!_PyAST_Optimize(mod, arena, c.c_optimize)) {
        goto finally;
    }

    c.c_st = PySymtable_BuildObject(mod, filename, c.c_future);
    if (c.c_st == NULL) {
        if (!PyErr_Occurred())
            PyErr_SetString(PyExc_SystemError, "no symtable");
        goto finally;
    }

    co = compiler_mod(&c, mod);

 finally:
    compiler_free(&c);
    assert(co || PyErr_Occurred());
    return co;
}

optimize

_PyAST_Optimize 会针对AST做一些优化, 比如常量折叠

static int
fold_binop(expr_ty node, PyArena *arena, int optimize)
{
    expr_ty lhs, rhs;
    lhs = node->v.BinOp.left;
    rhs = node->v.BinOp.right;
    if (lhs->kind != Constant_kind || rhs->kind != Constant_kind) {
        return 1;
    }

    PyObject *lv = lhs->v.Constant.value;
    PyObject *rv = rhs->v.Constant.value;
    PyObject *newval;

    switch (node->v.BinOp.op) {
    case Add:
        newval = PyNumber_Add(lv, rv);
        break;
    case Sub:
        newval = PyNumber_Subtract(lv, rv);
        break;
    case Mult:
        newval = safe_multiply(lv, rv);
        break;
    case Div:
        newval = PyNumber_TrueDivide(lv, rv);
        break;
    case FloorDiv:
        newval = PyNumber_FloorDivide(lv, rv);
        break;
    case Mod:
        newval = safe_mod(lv, rv);
        break;
    case Pow:
        newval = safe_power(lv, rv);
        break;
    case LShift:
        newval = safe_lshift(lv, rv);
        break;
    case RShift:
        newval = PyNumber_Rshift(lv, rv);
        break;
    case BitOr:
        newval = PyNumber_Or(lv, rv);
        break;
    case BitXor:
        newval = PyNumber_Xor(lv, rv);
        break;
    case BitAnd:
        newval = PyNumber_And(lv, rv);
        break;
    default: // Unknown operator
        return 1;
    }
    return make_const(node, newval, arena);
}

比如

a = 3 + 4

AST 表示如下

ast_before

fold_binop 之后, 变成了

ast_after

这个常量折叠的方法同样会做如下操作

symtable

_PyAST_Optimize 之后, PySymtable_BuildObject 会遍历 AST 并把 function/class/name 等定义/声明的变量加到一个 Python 字典对象中

symtable

CodeObject

compiler_mod 会遍历这个 AST, 并且生成对应的指令集和所需要的数据, 之后生成对应的 PyCodeObject 对象

static int
compiler_visit_expr1(struct compiler *c, expr_ty e)
{
      // ...
      case Constant_kind:
        ADDOP_LOAD_CONST(c, e->v.Constant.value);
        break;
      // ...
}

static int
compiler_addop_load_const(struct compiler *c, PyObject *o)
{
    // 把对应的常量加到字典对象 c->u->u_consts 中, 并把对应常量的最后的位移位置返回
    Py_ssize_t arg = compiler_add_const(c, o);
    if (arg < 0)
        return 0;
    // 把 `LOAD_CONST arg` 加到下一个指令中
    return compiler_addop_i(c, LOAD_CONST, arg);
}

// 下列的函数在 Python/compile.c 中可以找到
static int
compiler_addop_i(struct compiler *c, int opcode, Py_ssize_t oparg)
{
    struct instr *i;
    int off;

    // ...
  
    off = compiler_next_instr(c, c->u->u_curblock);
    if (off < 0)
        return 0;
    i = &c->u->u_curblock->b_instr[off];
    i->i_opcode = opcode;
    i->i_oparg = Py_SAFE_DOWNCAST(oparg, Py_ssize_t, int);
    compiler_set_lineno(c, off);
    return 1;
}

static Py_ssize_t
compiler_add_const(struct compiler *c, PyObject *o)
{
    PyObject *key = merge_consts_recursive(c, o);
    if (key == NULL) {
        return -1;
    }

    Py_ssize_t arg = compiler_add_o(c, c->u->u_consts, key);
    Py_DECREF(key);
    return arg;
}

static Py_ssize_t
compiler_add_o(struct compiler *c, PyObject *dict, PyObject *o)
{
    PyObject *v;
    Py_ssize_t arg;

    v = PyDict_GetItemWithError(dict, o);
    if (!v) {
        if (PyErr_Occurred()) {
            return -1;
        }
        arg = PyDict_GET_SIZE(dict);
        v = PyLong_FromSsize_t(arg);
        if (!v) {
            return -1;
        }
        if (PyDict_SetItem(dict, o, v) < 0) {
            Py_DECREF(v);
            return -1;
        }
        Py_DECREF(v);
    }
    else
        arg = PyLong_AsLong(v);
    return arg;
}

常量在上述操作时是以字典形式存储的, {'a': 0} 键表示对应常量的 PyObject, 值表示参数位移

它最终会被 Python/compile.c->consts_dict_keys_inorder 这个函数按照值里的位移顺序被转换成对应的列表对象,

最终, Python/compile.c->makecode 会把上述的指令和对应的数据结构存储到新建的 PyCodeObject

  1           0 LOAD_CONST               0 (7)
              2 STORE_NAME               0 (a)
              4 LOAD_CONST               1 (None)
              6 RETURN_VALUE

read more