Hi @chuanzhen actually the problem interested me and I took some personal time on my weekend to do some tests.
I did 2 test scenes, which was a cube with 163970 points driven by 3 joints
With your original source code 0.53 fps
optimize python: 1.09 fps (x2.05)
C++: 19.36 fps (x36.52)
And the other one was the same cube but driven by 11 joints.
With your original source code 0.37 fps
optimize python: 0.64 fps (x1.73)
C++: 12.08 (x32.65)
So here is the optimized python code. As you can see I try to avoid as much as possible doing operation in the nested loop.
class newskin(plugins.ObjectData):
 
    lasttagdirty = None   #for weight change update
    jointdir = None       #for joint change update
    cachedir = None       #for other bindmesh check
 
    def CheckDirty(self, op, doc):
        #check first run
        if op[c4d.NEWSKINTAG] is None:
            op.SetDirty(c4d.DIRTYFLAGS_DATA)
            return
 
        #check weight change
        if op[c4d.NEWSKINTAG].GetDirty(c4d.DIRTYFLAGS_DATA) != self.lasttagdirty:
            op.SetDirty(c4d.DIRTYFLAGS_DATA)
            return
 
        #check matrix change
        checktime = op[c4d.NEWSKINTAG].GetJointCount()
        cdirty = 0
 
        for i in xrange(checktime):
            cdirty += op[c4d.NEWSKINTAG].GetJoint(i).GetDirty(c4d.DIRTYFLAGS_MATRIX)
 
        if cdirty != self.jointdir or self.cachedir != op[c4d.NEWSKINCHECK].GetDirty(c4d.DIRTYFLAGS_CACHE):
            op.SetDirty(c4d.DIRTYFLAGS_DATA)
            return
 
        return
 
   
    def ModifyObject(self, mod, doc, op, op_mg, mod_mg, lod, flags, thread):
 
        if mod[c4d.NEWSKINTAG] is None or mod[c4d.NEWSKINCHECK] is None:
            return True
 
        tag = mod[c4d.NEWSKINTAG]
        self.lasttagdirty = tag.GetDirty(c4d.DIRTYFLAGS_DATA)  #get current weight tag dirty
        self.cachedir = mod[c4d.NEWSKINCHECK].GetDirty(c4d.DIRTYFLAGS_CACHE)    #get bindmesh check
        plist = [pos * op_mg for pos in op.GetAllPoints()]
        pcount = op.GetPointCount()  
        jcount = tag.GetJointCount()
        self.jointdir = 0
 
        for m in xrange(jcount):
            joint = tag.GetJoint(m)
            self.jointdir += joint.GetDirty(c4d.DIRTYFLAGS_MATRIX)   #all joint current matrix dirtycount
 
        temp = c4d.Vector()
        for n in xrange(pcount):    #n:point index  
            temp %= temp
 
            for m in xrange(jcount):
                joint = tag.GetJoint(m)
                weight = tag.GetWeight(m, n)
 
                if not weight :
                    continue  
 
                cjmg = joint.GetMg()
                jdict = tag.GetJointRestState(m)
                jmg = jdict["m_bMg"]
                jmi = jdict["m_bMi"]
 
                temp += weight * cjmg * jmi * plist[n]  #defrmer global pos
            plist[n] = temp
       
        plist = [~op_mg * pos for pos in plist]
        op.SetAllPoints(plist)
        op.Message(c4d.MSG_UPDATE)
 
        return True
And here the C++ version (compatible R20 only). It uses paralellFor, not sure it's really worth didn't do proper profiling about it but I wanted to try them. Moreover please do not consider my C++ code has fully optimized since you could probably improve it depending on the situation.
#include "c4d_symbols.h"
#include "main.h"
#include "c4d_objectdata.h"
#include "lib_ca.h"
// Local resources
#include "oskinmodifier.h"
#include "maxon/parallelfor.h"
#include "maxon/basearray.h"
/**A unique plugin ID. You must obtain this from http://www.plugincafe.com. Use this ID to create new instances of this object.*/
static const Int32 ID_OBJECTDATA_SKINMODIFIER = 1000002;
class SkinModifier : public ObjectData
{
	INSTANCEOF(SkinModifier, ObjectData)
public:
	static NodeData* Alloc() { return NewObj(SkinModifier) iferr_ignore("SkinModifier plugin not instanced"); }
	virtual void CheckDirty(BaseObject *op, BaseDocument *doc);
	virtual Bool ModifyObject(BaseObject* mod, BaseDocument* doc, BaseObject* op, const Matrix& op_mg, const Matrix& mod_mg, Float lod, Int32 flags, BaseThread* thread);
private:
	BaseList2D* GetBaseLink(BaseObject* op, DescID id, Int excepted);
	Int lasttagdirty; /// get current weight tag dirty
	Int jointdir;
	Int cachedir;
};
void SkinModifier::CheckDirty(BaseObject *op, BaseDocument *doc)
{
	BaseList2D* t = GetBaseLink(op, DescID(NEWSKINTAG), Tweights);
	BaseList2D* l = GetBaseLink(op, DescID(NEWSKINCHECK), Opolygon);
	if (!t || !l)
		return;
	PointObject* linkOp = static_cast<PointObject*>(l);
	CAWeightTag* tag = static_cast<CAWeightTag*>(t);
	// check first run
	if (!tag)
	{
		op->SetDirty(DIRTYFLAGS::DATA);
		return;
	}
	// check weight change
	if (tag->GetDirty(DIRTYFLAGS::DATA) != lasttagdirty)
	{
		op->SetDirty(DIRTYFLAGS::DATA);
		return;
	}
	// check matrix change
	Int cdirty = 0;
	for (Int i = 0; i < tag->GetJointCount(); i++)
	{
		cdirty += tag->GetJoint(i, tag->GetDocument())->GetDirty(DIRTYFLAGS::MATRIX);
		if (cdirty != jointdir || cachedir != linkOp->GetDirty(DIRTYFLAGS::CACHE))
		{
			op->SetDirty(DIRTYFLAGS::DATA);
			return;
		}
	}
	return;
}
BaseList2D* SkinModifier::GetBaseLink(BaseObject* op, DescID id, Int excepted)
{
	GeData data;
	if (!op->GetParameter(id, data, DESCFLAGS_GET::NONE))
		return nullptr;
	return data.GetLink(op->GetDocument(), excepted);
}
Bool SkinModifier::ModifyObject(BaseObject* mod, BaseDocument* doc, BaseObject* op, const Matrix& op_mg, const Matrix& mod_mg, Float lod, Int32 flags, BaseThread* thread)
{
	if (!mod || !op || !doc || !thread)
		return false;
	BaseList2D* t = GetBaseLink(mod, DescID(NEWSKINTAG), Tweights);
	BaseList2D* l = GetBaseLink(mod, DescID(NEWSKINCHECK), Opolygon);
	if (!t || !l)
		return false;
	PointObject* linkOp = static_cast<PointObject*>(l);
	CAWeightTag* tag = static_cast<CAWeightTag*>(t);
	lasttagdirty = tag->GetDirty(DIRTYFLAGS::DATA);
	cachedir = linkOp->GetDirty(DIRTYFLAGS::CACHE);
	PointObject* pObj = ToPoint(op);
	const Vector * pListR = pObj->GetPointR();
	Vector * pListW = pObj->GetPointW();
	const Int pcount = pObj->GetPointCount();
	const Int jcount = tag->GetJointCount();
	jointdir = 0;
	for (Int i = 0; i < jcount; i++)
	{
		BaseObject* joint = tag->GetJoint(i, doc);
		jointdir += joint->GetDirty(DIRTYFLAGS::MATRIX);
	}
	auto worker = [jcount, op_mg, &pListR, &pListW, &doc, &tag](maxon::Int i)
	{
		Vector temp;
		Vector pos = op_mg * pListR[i];
		for (Int m = 0; m < jcount; m++)
		{
			Vector pos = op_mg * pListR[i];
			BaseObject* joint = tag->GetJoint(m, doc);
			Float weight = tag->GetWeight(m, i);
			if (weight == 0.0)
				continue;
			Matrix cjmg = joint->GetMg();
			JointRestState jrest = tag->GetJointRestState(m);
			Matrix jmg = jrest.m_bMg;
			Matrix jmi = jrest.m_bMi;
			temp += weight * cjmg * jmi * pos;
		}
		pListW[i] = ~op_mg * temp;
	};
	maxon::ParallelFor::Dynamic(0, pcount, worker);
	// Notify Cinema about the internal data update.
	op->Message(MSG_UPDATE);
	return true;
}
Bool RegisterSkinModifier()
{
	return RegisterObjectPlugin(ID_OBJECTDATA_SKINMODIFIER, "C++ oskinmodifier"_s, OBJECT_MODIFIER, SkinModifier::Alloc, "oskinmodifier"_s, nullptr, 0);
}
Cheers,
Maxime.