Top


定義場所(file name)

hotspot/src/cpu/sparc/vm/sharedRuntime_sparc.cpp

説明(description)

// ---------------------------------------------------------------------------
// Generate a native wrapper for a given method.  The method takes arguments
// in the Java compiled code convention, marshals them to the native
// convention (handlizes oops, etc), transitions to native, makes the call,
// returns to java state (possibly blocking), unhandlizes any result and
// returns.

名前(function name)

nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
                                                methodHandle method,
                                                int compile_id,
                                                int total_in_args,
                                                int comp_args_on_stack, // in VMRegStackSlots
                                                BasicType *in_sig_bt,
                                                VMRegPair *in_regs,
                                                BasicType ret_type) {

本体部(body)

  {- -------------------------------------------
  (1) 生成するコード用の oopmap を用意しておく.
      ---------------------------------------- -}

      // Native nmethod wrappers never take possesion of the oop arguments.
      // So the caller will gc the arguments. The only thing we need an
      // oopMap for is if the call is static
      //
      // An OopMap for lock (and class if static), and one for the VM call itself
      OopMapSet *oop_maps = new OopMapSet();

  {- -------------------------------------------
  (1) (ここからが生成するコードの始まり)
      (ここが "unverified entry point"(uep) の先頭)
      ---------------------------------------- -}

      intptr_t start = (intptr_t)__ pc();

  {- -------------------------------------------
  (1) コード生成:
      「Inline Caching 用の型検査を行う (= "unverified entry point" の処理). 
       型が違っていた場合は, SharedRuntime::get_ic_miss_stub() に格納されているスタブにジャンプする.
       合っていた場合は, このままフォールスルー」
      ---------------------------------------- -}

      // First thing make an ic check to see if we should even be here
      {
        Label L;
        const Register temp_reg = G3_scratch;
        AddressLiteral ic_miss(SharedRuntime::get_ic_miss_stub());
        __ verify_oop(O0);
        __ load_klass(O0, temp_reg);
        __ cmp(temp_reg, G5_inline_cache_reg);
        __ brx(Assembler::equal, true, Assembler::pt, L);
        __ delayed()->nop();

        __ jump_to(ic_miss, temp_reg);
        __ delayed()->nop();
        __ align(CodeEntryAlignment);
        __ bind(L);
      }

  {- -------------------------------------------
  (1) (ここから先が "verified entry point"(vep))
      ---------------------------------------- -}

      int vep_offset = ((intptr_t)__ pc()) - start;

  {- -------------------------------------------
  (1) コード生成:
      「」
      ---------------------------------------- -}

    #ifdef COMPILER1
      if (InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) {
        // Object.hashCode can pull the hashCode from the header word
        // instead of doing a full VM transition once it's been computed.
        // Since hashCode is usually polymorphic at call sites we can't do
        // this optimization at the call site without a lot of work.
        Label slowCase;
        Register receiver             = O0;
        Register result               = O0;
        Register header               = G3_scratch;
        Register hash                 = G3_scratch; // overwrite header value with hash value
        Register mask                 = G1;         // to get hash field from header

        // Read the header and build a mask to get its hash field.  Give up if the object is not unlocked.
        // We depend on hash_mask being at most 32 bits and avoid the use of
        // hash_mask_in_place because it could be larger than 32 bits in a 64-bit
        // vm: see markOop.hpp.
        __ ld_ptr(receiver, oopDesc::mark_offset_in_bytes(), header);
        __ sethi(markOopDesc::hash_mask, mask);
        __ btst(markOopDesc::unlocked_value, header);
        __ br(Assembler::zero, false, Assembler::pn, slowCase);
        if (UseBiasedLocking) {
          // Check if biased and fall through to runtime if so
          __ delayed()->nop();
          __ btst(markOopDesc::biased_lock_bit_in_place, header);
          __ br(Assembler::notZero, false, Assembler::pn, slowCase);
        }
        __ delayed()->or3(mask, markOopDesc::hash_mask & 0x3ff, mask);

        // Check for a valid (non-zero) hash code and get its value.
    #ifdef _LP64
        __ srlx(header, markOopDesc::hash_shift, hash);
    #else
        __ srl(header, markOopDesc::hash_shift, hash);
    #endif
        __ andcc(hash, mask, hash);
        __ br(Assembler::equal, false, Assembler::pn, slowCase);
        __ delayed()->nop();

        // leaf return.
        __ retl();
        __ delayed()->mov(hash, result);
        __ bind(slowCase);
      }
    #endif // COMPILER1


  {- -------------------------------------------
  (1) SharedRuntime::c_calling_convention() を呼んで, 
      ネイティブメソッド用の calling convention 及びレジスタに乗らない引数の個数を算出しておく.
      ---------------------------------------- -}

      // We have received a description of where all the java arg are located
      // on entry to the wrapper. We need to convert these args to where
      // the jni function will expect them. To figure out where they go
      // we convert the java signature to a C signature by inserting
      // the hidden arguments as arg[0] and possibly arg[1] (static method)

      int total_c_args = total_in_args + 1;
      if (method->is_static()) {
        total_c_args++;
      }

      BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
      VMRegPair  * out_regs   = NEW_RESOURCE_ARRAY(VMRegPair,   total_c_args);

      int argc = 0;
      out_sig_bt[argc++] = T_ADDRESS;
      if (method->is_static()) {
        out_sig_bt[argc++] = T_OBJECT;
      }

      for (int i = 0; i < total_in_args ; i++ ) {
        out_sig_bt[argc++] = in_sig_bt[i];
      }

      // Now figure out where the args must be stored and how much stack space
      // they require (neglecting out_preserve_stack_slots but space for storing
      // the 1st six register arguments). It's weird see int_stk_helper.
      //
      int out_arg_slots;
      out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);

  {- -------------------------------------------
  (1) (変数宣言など)

      (ここでは, JIT 生成するスタブ部分が必要とする「スタックフレームサイズ」を計算している.
       * synchronized の場合には, BasicObjectLock 用のスペースも忘れずに確保している

       * レジスタ渡しになっている oop については, ここで Handle 経由にしておかないと分からなくなってしまうので,
         フレーム内にはその Handle を確保するための領域も用意している. 
         ("// Now the space for the inbound oop handle area" 以下の行を参照)

         sparc なのでレジスタ渡しになるのは最大 6 個.
         現在の実装では, 実際に oop 引数がいくらあるかに関らず, 常時 6 個分の Handle スペースを確保しており, 
         対応が簡単に付くようにしている模様.
         そして, 後で呼ばれる object_move() の中で, Handle を作ってポインタ値を Handle 内に格納し, 
         代わりに引数はポインタ値そのものではなく Handle のポインタを渡すようにしている.

       * 最後の +2 は一時的な作業領域. 
         また返り値が float だった場合に, lock 処理などの前に返り値を一時的に退避するのにも使われている模様)
      ---------------------------------------- -}

      // Compute framesize for the wrapper.  We need to handlize all oops in
      // registers. We must create space for them here that is disjoint from
      // the windowed save area because we have no control over when we might
      // flush the window again and overwrite values that gc has since modified.
      // (The live window race)
      //
      // We always just allocate 6 word for storing down these object. This allow
      // us to simply record the base and use the Ireg number to decide which
      // slot to use. (Note that the reg number is the inbound number not the
      // outbound number).
      // We must shuffle args to match the native convention, and include var-args space.

      // Calculate the total number of stack slots we will need.

      // First count the abi requirement plus all of the outgoing args
      int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;

      // Now the space for the inbound oop handle area

      int oop_handle_offset = stack_slots;
      stack_slots += 6*VMRegImpl::slots_per_word;

      // Now any space we need for handlizing a klass if static method

      int oop_temp_slot_offset = 0;
      int klass_slot_offset = 0;
      int klass_offset = -1;
      int lock_slot_offset = 0;
      bool is_static = false;

      if (method->is_static()) {
        klass_slot_offset = stack_slots;
        stack_slots += VMRegImpl::slots_per_word;
        klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
        is_static = true;
      }

      // Plus a lock if needed

      if (method->is_synchronized()) {
        lock_slot_offset = stack_slots;
        stack_slots += VMRegImpl::slots_per_word;
      }

      // Now a place to save return value or as a temporary for any gpr -> fpr moves
      stack_slots += 2;

      // Ok The space we have allocated will look like:
      //
      //
      // FP-> |                     |
      //      |---------------------|
      //      | 2 slots for moves   |
      //      |---------------------|
      //      | lock box (if sync)  |
      //      |---------------------| <- lock_slot_offset
      //      | klass (if static)   |
      //      |---------------------| <- klass_slot_offset
      //      | oopHandle area      |
      //      |---------------------| <- oop_handle_offset
      //      | outbound memory     |
      //      | based arguments     |
      //      |                     |
      //      |---------------------|
      //      | vararg area         |
      //      |---------------------|
      //      |                     |
      // SP-> | out_preserved_slots |
      //
      //


      // Now compute actual number of stack words we need rounding to make
      // stack properly aligned.
      stack_slots = round_to(stack_slots, 2 * VMRegImpl::slots_per_word);

      int stack_size = stack_slots * VMRegImpl::stack_slot_size;

  {- -------------------------------------------
  (1) コード生成:
     「AbstractAssembler::generate_stack_overflow_check() が生成するコードにより
       stack overflow のチェックを行う」
      ---------------------------------------- -}

      // Generate stack overflow check before creating frame
      __ generate_stack_overflow_check(stack_size);

  {- -------------------------------------------
  (1) コード生成:
      「save 命令でスタックフレームを確保する (SP をずらし, レジスタを退避)」
      ---------------------------------------- -}

      // Generate a new frame for the wrapper.
      __ save(SP, -stack_size, SP);

  {- -------------------------------------------
  (1) (ここから先が Frame_Complete)
      ---------------------------------------- -}

      int frame_complete = ((intptr_t)__ pc()) - start;

  {- -------------------------------------------
  (1) コード生成: (verify)
      ---------------------------------------- -}

      __ verify_thread();


  {- -------------------------------------------
  (1) コード生成:
      「引数を, ネイティブメソッドの calling convention に合わせて移動させる」

       (なお JNI 呼び出しでは, 普通の Java メソッドの呼び出し時と異なり, 
        引数として JNIEnv* を増やさないといけない (さらに static な場合には class 引数も付けないといけない).
        このため, 両方がレジスタ渡しの場合であっても位置は合わない)

        <= とはいえ sparc の場合は save で O* レジスタが I* にずれているので, 
        詰めなおしは簡単(うっかり上書きとかはありえない)

       (なお, OopMap も作って, どこにポインタを埋めたかも記録している.
        OopMap は(ちょっとした Trick だが)計算したスタックサイズの2倍(stack_slots * 2)の分を確保し,
        呼び出し元の incoming oop args に関する情報も格納できるようにしている模様)
        <= 最終的にできた OopMap は, nmethod のコンストラクタに渡して中に格納している模様.
      ---------------------------------------- -}

      //
      // We immediately shuffle the arguments so that any vm call we have to
      // make from here on out (sync slow path, jvmti, etc.) we will have
      // captured the oops from our caller and have a valid oopMap for
      // them.

      // -----------------
      // The Grand Shuffle
      //
      // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
      // (derived from JavaThread* which is in L7_thread_cache) and, if static,
      // the class mirror instead of a receiver.  This pretty much guarantees that
      // register layout will not match.  We ignore these extra arguments during
      // the shuffle. The shuffle is described by the two calling convention
      // vectors we have in our possession. We simply walk the java vector to
      // get the source locations and the c vector to get the destinations.
      // Because we have a new window and the argument registers are completely
      // disjoint ( I0 -> O1, I1 -> O2, ...) we have nothing to worry about
      // here.

      // This is a trick. We double the stack slots so we can claim
      // the oops in the caller's frame. Since we are sure to have
      // more args than the caller doubling is enough to make
      // sure we can capture all the incoming oop args from the
      // caller.
      //
      OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
      int c_arg = total_c_args - 1;
      // Record sp-based slot for receiver on stack for non-static methods
      int receiver_offset = -1;

      // We move the arguments backward because the floating point registers
      // destination will always be to a register with a greater or equal register
      // number or the stack.

    #ifdef ASSERT
      bool reg_destroyed[RegisterImpl::number_of_registers];
      bool freg_destroyed[FloatRegisterImpl::number_of_registers];
      for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
        reg_destroyed[r] = false;
      }
      for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
        freg_destroyed[f] = false;
      }

    #endif /* ASSERT */

      for ( int i = total_in_args - 1; i >= 0 ; i--, c_arg-- ) {

    #ifdef ASSERT
        if (in_regs[i].first()->is_Register()) {
          assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "ack!");
        } else if (in_regs[i].first()->is_FloatRegister()) {
          assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding(FloatRegisterImpl::S)], "ack!");
        }
        if (out_regs[c_arg].first()->is_Register()) {
          reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
        } else if (out_regs[c_arg].first()->is_FloatRegister()) {
          freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding(FloatRegisterImpl::S)] = true;
        }
    #endif /* ASSERT */

        switch (in_sig_bt[i]) {
          case T_ARRAY:
          case T_OBJECT:
            object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
                        ((i == 0) && (!is_static)),
                        &receiver_offset);
            break;
          case T_VOID:
            break;

          case T_FLOAT:
            float_move(masm, in_regs[i], out_regs[c_arg]);
              break;

          case T_DOUBLE:
            assert( i + 1 < total_in_args &&
                    in_sig_bt[i + 1] == T_VOID &&
                    out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
            double_move(masm, in_regs[i], out_regs[c_arg]);
            break;

          case T_LONG :
            long_move(masm, in_regs[i], out_regs[c_arg]);
            break;

          case T_ADDRESS: assert(false, "found T_ADDRESS in java args");

          default:
            move32_64(masm, in_regs[i], out_regs[c_arg]);
        }
      }

  {- -------------------------------------------
  (1) コード生成: (対象のメソッドが staic method の場合にのみ生成)
      「引数として渡す必要があるので, クラスオブジェクトを O1 レジスタにセット
        (正確にはクラスオブジェクトを指す Handle を作り, O1 にその Handle をセット)」
      ---------------------------------------- -}

      // Pre-load a static method's oop into O1.  Used both by locking code and
      // the normal JNI call code.
      if (method->is_static()) {
        __ set_oop_constant(JNIHandles::make_local(Klass::cast(method->method_holder())->java_mirror()), O1);

        // Now handlize the static class mirror in O1.  It's known not-null.
        __ st_ptr(O1, SP, klass_offset + STACK_BIAS);
        map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
        __ add(SP, klass_offset + STACK_BIAS, O1);
      }


  {- -------------------------------------------
  (1) コード生成: (対象のメソッドが synchronized method の場合にのみ生成)
      「ロック取得対象のオブジェクトを L6_handle レジスタにもコピーしておく」
      ---------------------------------------- -}

      const Register L6_handle = L6;

      if (method->is_synchronized()) {
        __ mov(O1, L6_handle);
      }

  {- -------------------------------------------
  (1) この時点の pc を取得し, 作成した oopmap と対応付けて oopmapset に登録しておく.
      (実行時にも O7 に pc が入るようだが, これはどういう風に使われている?? #TODO)
      ---------------------------------------- -}

      // We have all of the arguments setup at this point. We MUST NOT touch any Oregs
      // except O6/O7. So if we must call out we must push a new frame. We immediately
      // push a new frame and flush the windows.

    #ifdef _LP64
      intptr_t thepc = (intptr_t) __ pc();
      {
        address here = __ pc();
        // Call the next instruction
        __ call(here + 8, relocInfo::none);
        __ delayed()->nop();
      }
    #else
      intptr_t thepc = __ load_pc_address(O7, 0);
    #endif /* _LP64 */

      // We use the same pc/oopMap repeatedly when we call out
      oop_maps->add_gc_map(thepc - start, map);

      // O7 now has the pc loaded that we will use when we finally call to native.

  {- -------------------------------------------
  (1) コード生成:
      「G2_thread レジスタを待避する 」
      ---------------------------------------- -}

      // Save thread in L7; it crosses a bunch of VM calls below
      // Don't use save_thread because it smashes G2 and we merely
      // want to save a copy
      __ mov(G2_thread, L7_thread_cache);


  {- -------------------------------------------
  (1) (変数宣言など)
      (これは, 途中で inner_frame を作る必要が生じた場合に, 
       何度も作らず一回だけで済ませる(流用する)ための変数)
      ---------------------------------------- -}

      // If we create an inner frame once is plenty
      // when we create it we must also save G2_thread
      bool inner_frame_created = false;

  {- -------------------------------------------
  (1) コード生成: (DTrace のフック点) (See: SharedRuntime::dtrace_method_entry())
      ---------------------------------------- -}

      // dtrace method entry support
      {
        SkipIfEqual skip_if(
          masm, G3_scratch, &DTraceMethodProbes, Assembler::zero);
        // create inner frame
        __ save_frame(0);
        __ mov(G2_thread, L7_thread_cache);
        __ set_oop_constant(JNIHandles::make_local(method()), O1);
        __ call_VM_leaf(L7_thread_cache,
             CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
             G2_thread, O1);
        __ restore();
      }

  {- -------------------------------------------
  (1) コード生成: (トレース出力) (See: SharedRuntime::rc_trace_method_entry())
      (JVMTI の RedefineClasses() 関係のトレース出力)
      ---------------------------------------- -}

      // RedefineClasses() tracing support for obsolete method entry
      if (RC_TRACE_IN_RANGE(0x00001000, 0x00002000)) {
        // create inner frame
        __ save_frame(0);
        __ mov(G2_thread, L7_thread_cache);
        __ set_oop_constant(JNIHandles::make_local(method()), O1);
        __ call_VM_leaf(L7_thread_cache,
             CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),
             G2_thread, O1);
        __ restore();
      }

  {- -------------------------------------------
  (1) (コメントによると, 
       この後のロック処理が終わった時点では, 通常は jni frame にいるが, 
       saved_frame(って何だ?? #TODO) が true の場合は一段深い inner frame にいる, 
       とのこと.

       (ロック処理や JVMTI 処理等で VM にいく必要があったのなら inner frame にいる)
       (引数は, jni frame なら Oregs, inner frame なら Iregs にある))
      ---------------------------------------- -}

      // We are in the jni frame unless saved_frame is true in which case
      // we are in one frame deeper (the "inner" frame). If we are in the
      // "inner" frames the args are in the Iregs and if the jni frame then
      // they are in the Oregs.
      // If we ever need to go to the VM (for locking, jvmti) then
      // we will always be in the "inner" frame.

  {- -------------------------------------------
  (1) コード生成: (対象のメソッドが synchronized method の場合にのみ生成)
      「ロックを取得する.
        (まず, MacroAssembler::compiler_lock_object() が生成するコードでロックの取得を試みる.
        失敗したら, SharedRuntime::complete_monitor_locking_C() を呼び出して, ロックを取得する)」
      ---------------------------------------- -}

      // Lock a synchronized method
      int lock_offset = -1;         // Set if locked
      if (method->is_synchronized()) {
        Register Roop = O1;
        const Register L3_box = L3;

        create_inner_frame(masm, &inner_frame_created);

        __ ld_ptr(I1, 0, O1);
        Label done;

        lock_offset = (lock_slot_offset * VMRegImpl::stack_slot_size);
        __ add(FP, lock_offset+STACK_BIAS, L3_box);
    #ifdef ASSERT
        if (UseBiasedLocking) {
          // making the box point to itself will make it clear it went unused
          // but also be obviously invalid
          __ st_ptr(L3_box, L3_box, 0);
        }
    #endif // ASSERT
        //
        // Compiler_lock_object (Roop, Rmark, Rbox, Rscratch) -- kills Rmark, Rbox, Rscratch
        //
        __ compiler_lock_object(Roop, L1,    L3_box, L2);
        __ br(Assembler::equal, false, Assembler::pt, done);
        __ delayed() -> add(FP, lock_offset+STACK_BIAS, L3_box);


        // None of the above fast optimizations worked so we have to get into the
        // slow case of monitor enter.  Inline a special case of call_VM that
        // disallows any pending_exception.
        __ mov(Roop, O0);            // Need oop in O0
        __ mov(L3_box, O1);

        // Record last_Java_sp, in case the VM code releases the JVM lock.

        __ set_last_Java_frame(FP, I7);

        // do the call
        __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type);
        __ delayed()->mov(L7_thread_cache, O2);

        __ restore_thread(L7_thread_cache); // restore G2_thread
        __ reset_last_Java_frame();

    #ifdef ASSERT
        { Label L;
        __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O0);
        __ br_null(O0, false, Assembler::pt, L);
        __ delayed()->nop();
        __ stop("no pending exception allowed on exit from IR::monitorenter");
        __ bind(L);
        }
    #endif
        __ bind(done);
      }


  {- -------------------------------------------
  (1) コード生成:
      「実際にネイティブメソッドを呼び出す処理を行う.

        なお, 呼び出す前に以下の処理を行っている.
        * register window を flush する  (<= 何かあった際に確実にスタックを辿れるようにするため??#TODO)
        * inner frame が作られていたら破棄しておく.
        * JNIEnv* 引数をセットする
        * JavaFrameAnchor に現在の SP と O7 をセットする (<= OopMapSet に登録した内容と合わないとまずいので(?). これの参照箇所は... #TODO)
        * Thread の state を _thread_in_native に変更する  」
      ---------------------------------------- -}

      // Finally just about ready to make the JNI call

      __ flush_windows();
      if (inner_frame_created) {
        __ restore();
      } else {
        // Store only what we need from this frame
        // QQQ I think that non-v9 (like we care) we don't need these saves
        // either as the flush traps and the current window goes too.
        __ st_ptr(FP, SP, FP->sp_offset_in_saved_window()*wordSize + STACK_BIAS);
        __ st_ptr(I7, SP, I7->sp_offset_in_saved_window()*wordSize + STACK_BIAS);
      }

      // get JNIEnv* which is first argument to native

      __ add(G2_thread, in_bytes(JavaThread::jni_environment_offset()), O0);

      // Use that pc we placed in O7 a while back as the current frame anchor

      __ set_last_Java_frame(SP, O7);

      // Transition from _thread_in_Java to _thread_in_native.
      __ set(_thread_in_native, G3_scratch);
      __ st(G3_scratch, G2_thread, JavaThread::thread_state_offset());

      // We flushed the windows ages ago now mark them as flushed

      // mark windows as flushed
      __ set(JavaFrameAnchor::flushed, G3_scratch);

      Address flags(G2_thread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::flags_offset());

    #ifdef _LP64
      AddressLiteral dest(method->native_function());
      __ relocate(relocInfo::runtime_call_type);
      __ jumpl_to(dest, O7, O7);
    #else
      __ call(method->native_function(), relocInfo::runtime_call_type);
    #endif
      __ delayed()->st(G3_scratch, flags);

  {- -------------------------------------------
  (1) コード生成:
      「G2_thread を復帰させる (ネイティブメソッドは HotSpot 内ではないので壊れている可能性あり)」
      ---------------------------------------- -}

      __ restore_thread(L7_thread_cache); // restore G2_thread

  {- -------------------------------------------
  (1) コード生成:
      「ネイティブメソッドの返値を, Java VM 内の calling convention に合わせて移動させる」
      ---------------------------------------- -}

      // Unpack native results.  For int-types, we do any needed sign-extension
      // and move things into I0.  The return value there will survive any VM
      // calls for blocking or unlocking.  An FP or OOP result (handle) is done
      // specially in the slow-path code.
      switch (ret_type) {
      case T_VOID:    break;        // Nothing to do!
      case T_FLOAT:   break;        // Got it where we want it (unless slow-path)
      case T_DOUBLE:  break;        // Got it where we want it (unless slow-path)
      // In 64 bits build result is in O0, in O0, O1 in 32bit build
      case T_LONG:
    #ifndef _LP64
                      __ mov(O1, I1);
    #endif
                      // Fall thru
      case T_OBJECT:                // Really a handle
      case T_ARRAY:
      case T_INT:
                      __ mov(O0, I0);
                      break;
      case T_BOOLEAN: __ subcc(G0, O0, G0); __ addc(G0, 0, I0); break; // !0 => true; 0 => false
      case T_BYTE   : __ sll(O0, 24, O0); __ sra(O0, 24, I0);   break;
      case T_CHAR   : __ sll(O0, 16, O0); __ srl(O0, 16, I0);   break; // cannot use and3, 0xFFFF too big as immediate value!
      case T_SHORT  : __ sll(O0, 16, O0); __ sra(O0, 16, I0);   break;
        break;                      // Cannot de-handlize until after reclaiming jvm_lock
      default:
        ShouldNotReachHere();
      }

  {- -------------------------------------------
  (1) コード生成:
      「もし Safepoint 停止が開始されていた場合, 
       JavaThread::check_special_condition_for_native_trans() を呼び出してここで block する. 」

       (確認は SafepointSynchronize::_state をチェックすることで行っている.)

       (なお, チェックの直前に Thread の state を _thread_in_native_trans に変えておく
        さらに, MP 環境の場合には
        VM Thread 側への visibility (sequential consistency) を保証する必要があるので, 
        変更後に以下のどちらかを行っている (See: SafepointSynchronize::begin()).
        * UseMembar オプションが指定されている場合:
          「OrderAccess::fence() が生成するコードでメモリバリアを張る.」
        * そうではない場合: 
          「InterfaceSupport::serialize_memory() が生成するコードで serialize page への書き込みを行う.」

        こうしないと, チェックが VM Thread による _state の変更と race した場合に, すり抜けてしまう恐れがある.
        逆にこの順序で行うことで, race した場合でも VM Thread が見つけて
        この Thread の suspend_flags を変更してくれるようにできている.
        このため, SafepointSynchronize::_state と Thread::_suspend_flags の両方をチェックすれば, 
        すり抜けること無くチェックが行える)
      ---------------------------------------- -}

      // must we block?

      // Block, if necessary, before resuming in _thread_in_Java state.
      // In order for GC to work, don't clear the last_Java_sp until after blocking.
      { Label no_block;
        AddressLiteral sync_state(SafepointSynchronize::address_of_state());

        // Switch thread to "native transition" state before reading the synchronization state.
        // This additional state is necessary because reading and testing the synchronization
        // state is not atomic w.r.t. GC, as this scenario demonstrates:
        //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
        //     VM thread changes sync state to synchronizing and suspends threads for GC.
        //     Thread A is resumed to finish this native method, but doesn't block here since it
        //     didn't see any synchronization is progress, and escapes.
        __ set(_thread_in_native_trans, G3_scratch);
        __ st(G3_scratch, G2_thread, JavaThread::thread_state_offset());
        if(os::is_MP()) {
          if (UseMembar) {
            // Force this write out before the read below
            __ membar(Assembler::StoreLoad);
          } else {
            // Write serialization page so VM thread can do a pseudo remote membar.
            // We use the current thread pointer to calculate a thread specific
            // offset to write to within the page. This minimizes bus traffic
            // due to cache line collision.
            __ serialize_memory(G2_thread, G1_scratch, G3_scratch);
          }
        }
        __ load_contents(sync_state, G3_scratch);
        __ cmp(G3_scratch, SafepointSynchronize::_not_synchronized);

        Label L;
        Address suspend_state(G2_thread, JavaThread::suspend_flags_offset());
        __ br(Assembler::notEqual, false, Assembler::pn, L);
        __ delayed()->ld(suspend_state, G3_scratch);
        __ cmp(G3_scratch, 0);
        __ br(Assembler::equal, false, Assembler::pt, no_block);
        __ delayed()->nop();
        __ bind(L);

        // Block.  Save any potential method result value before the operation and
        // use a leaf call to leave the last_Java_frame setup undisturbed. Doing this
        // lets us share the oopMap we used when we went native rather the create
        // a distinct one for this pc
        //
        save_native_result(masm, ret_type, stack_slots);
        __ call_VM_leaf(L7_thread_cache,
                        CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans),
                        G2_thread);

        // Restore any method result value
        restore_native_result(masm, ret_type, stack_slots);
        __ bind(no_block);
      }

  {- -------------------------------------------
  (1) コード生成:
      「カレントスレッドの thread state (JavaThread::_thread_state フィールド) を, 元に戻す」
      ---------------------------------------- -}

      // thread state is thread_in_native_trans. Any safepoint blocking has already
      // happened so we can now change state to _thread_in_Java.


      __ set(_thread_in_Java, G3_scratch);
      __ st(G3_scratch, G2_thread, JavaThread::thread_state_offset());


  {- -------------------------------------------
  (1) コード生成:
      「スタック上の yellow page が disabled になっていたら 
       SharedRuntime::reguard_yellow_pages() を呼んで元に戻す」
      ---------------------------------------- -}

      Label no_reguard;
      __ ld(G2_thread, JavaThread::stack_guard_state_offset(), G3_scratch);
      __ cmp(G3_scratch, JavaThread::stack_guard_yellow_disabled);
      __ br(Assembler::notEqual, false, Assembler::pt, no_reguard);
      __ delayed()->nop();

        save_native_result(masm, ret_type, stack_slots);
      __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
      __ delayed()->nop();

      __ restore_thread(L7_thread_cache); // restore G2_thread
        restore_native_result(masm, ret_type, stack_slots);

      __ bind(no_reguard);

  {- -------------------------------------------
  (1) コード生成: (対象のメソッドが synchronized method の場合にのみ生成)
      「取得したロックを解放しておく.
        (まず, MacroAssembler::compiler_unlock_object() が生成するコードでロックの解放を試みる.
        失敗したら, SharedRuntime::complete_monitor_unlocking_C() を呼び出して, ロックを解放する)」
      ---------------------------------------- -}

      // Handle possible exception (will unlock if necessary)

      // native result if any is live in freg or I0 (and I1 if long and 32bit vm)

      // Unlock
      if (method->is_synchronized()) {
        Label done;
        Register I2_ex_oop = I2;
        const Register L3_box = L3;
        // Get locked oop from the handle we passed to jni
        __ ld_ptr(L6_handle, 0, L4);
        __ add(SP, lock_offset+STACK_BIAS, L3_box);
        // Must save pending exception around the slow-path VM call.  Since it's a
        // leaf call, the pending exception (if any) can be kept in a register.
        __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), I2_ex_oop);
        // Now unlock
        //                       (Roop, Rmark, Rbox,   Rscratch)
        __ compiler_unlock_object(L4,   L1,    L3_box, L2);
        __ br(Assembler::equal, false, Assembler::pt, done);
        __ delayed()-> add(SP, lock_offset+STACK_BIAS, L3_box);

        // save and restore any potential method result value around the unlocking
        // operation.  Will save in I0 (or stack for FP returns).
        save_native_result(masm, ret_type, stack_slots);

        // Must clear pending-exception before re-entering the VM.  Since this is
        // a leaf call, pending-exception-oop can be safely kept in a register.
        __ st_ptr(G0, G2_thread, in_bytes(Thread::pending_exception_offset()));

        // slow case of monitor enter.  Inline a special case of call_VM that
        // disallows any pending_exception.
        __ mov(L3_box, O1);

        __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), relocInfo::runtime_call_type);
        __ delayed()->mov(L4, O0);              // Need oop in O0

        __ restore_thread(L7_thread_cache); // restore G2_thread

    #ifdef ASSERT
        { Label L;
        __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O0);
        __ br_null(O0, false, Assembler::pt, L);
        __ delayed()->nop();
        __ stop("no pending exception allowed on exit from IR::monitorexit");
        __ bind(L);
        }
    #endif
        restore_native_result(masm, ret_type, stack_slots);
        // check_forward_pending_exception jump to forward_exception if any pending
        // exception is set.  The forward_exception routine expects to see the
        // exception in pending_exception and not in a register.  Kind of clumsy,
        // since all folks who branch to forward_exception must have tested
        // pending_exception first and hence have it in a register already.
        __ st_ptr(I2_ex_oop, G2_thread, in_bytes(Thread::pending_exception_offset()));
        __ bind(done);
      }

  {- -------------------------------------------
  (1) コード生成: (DTrace のフック点) (See: SharedRuntime::dtrace_method_exit())
      ---------------------------------------- -}

      // Tell dtrace about this method exit
      {
        SkipIfEqual skip_if(
          masm, G3_scratch, &DTraceMethodProbes, Assembler::zero);
        save_native_result(masm, ret_type, stack_slots);
        __ set_oop_constant(JNIHandles::make_local(method()), O1);
        __ call_VM_leaf(L7_thread_cache,
           CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
           G2_thread, O1);
        restore_native_result(masm, ret_type, stack_slots);
      }

  {- -------------------------------------------
  (1) コード生成:
      「カレントスレッドの java frame anchor (Thread::_anchor フィールド) をクリアする」
      ---------------------------------------- -}

      // Clear "last Java frame" SP and PC.
      __ verify_thread(); // G2_thread must be correct
      __ reset_last_Java_frame();

  {- -------------------------------------------
  (1) コード生成: (返値の型が oop(オブジェクトや配列) の場合にのみ生成)
      「返値は JNIHandle で返されるので, 中身を取り出しておく.
       (ただし返値が NULL の場合は NULL のままにしておく)」
      ---------------------------------------- -}

      // Unpack oop result
      if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
          Label L;
          __ addcc(G0, I0, G0);
          __ brx(Assembler::notZero, true, Assembler::pt, L);
          __ delayed()->ld_ptr(I0, 0, I0);
          __ mov(G0, I0);
          __ bind(L);
          __ verify_oop(I0);
      }

  {- -------------------------------------------
  (1) コード生成:
      「JNI local handles を解放する.
       (もっと具体的に言うと, JNIHandleBlock::_top の値を 0 に変更する. 
        これにより JNIHandleBlock からの参照が無くなるため, GC で回収されるようになる.)」
      ---------------------------------------- -}

      // reset handle block
      __ ld_ptr(G2_thread, in_bytes(JavaThread::active_handles_offset()), L5);
      __ st_ptr(G0, L5, JNIHandleBlock::top_offset_in_bytes());

  {- -------------------------------------------
  (1) コード生成:
      「check_forward_pending_exception() が生成するコードにより, 
        カレントスレッドの pending_exception フィールドを確認し
        ネイティブメソッド呼び出し中に例外が発生したかどうかをチェックする. 
        もし 0 でなければ, 例外が発生したということなので 
        StubRoutines::forward_exception_entry() が指しているコードを呼び出す.
        (0 であればこのままフォールスルー)」
      ---------------------------------------- -}

      __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), G3_scratch);
      check_forward_pending_exception(masm, G3_scratch);


  {- -------------------------------------------
  (1) コード生成:
      「リターンする」
      ---------------------------------------- -}

      // Return

    #ifndef _LP64
      if (ret_type == T_LONG) {

        // Must leave proper result in O0,O1 and G1 (c2/tiered only)
        __ sllx(I0, 32, G1);          // Shift bits into high G1
        __ srl (I1, 0, I1);           // Zero extend O1 (harmless?)
        __ or3 (I1, G1, G1);          // OR 64 bits into G1
      }
    #endif

      __ ret();
      __ delayed()->restore();

  {- -------------------------------------------
  (1) 以上のコードを flush.
      ---------------------------------------- -}

      __ flush();

  {- -------------------------------------------
  (1) nmethod::new_native_nmethod() を呼んで, できたコードを登録する
      ---------------------------------------- -}

      nmethod *nm = nmethod::new_native_nmethod(method,
                                                compile_id,
                                                masm->code(),
                                                vep_offset,
                                                frame_complete,
                                                stack_slots / VMRegImpl::slots_per_word,
                                                (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
                                                in_ByteSize(lock_offset),
                                                oop_maps);

  {- -------------------------------------------
  (1) 結果をリターン
      ---------------------------------------- -}

      return nm;

    }

This document is available under the GNU GENERAL PUBLIC LICENSE Version 2.