Finally found the issue with cps3 on rpi2 with fba >= 0.2.97.30 : src/cpu/sh2/sh2.cpp
With old file, i’m at ~60fps, with new one i’m at ~45fps. Here are the changes between the two of them :
@@ -57,15 +57,16 @@
#define SH2_INT_15 15
#ifndef SH2_INLINE
-#define SH2_INLINE
+#define SH2_INLINE inline
#endif
#if FAST_OP_FETCH
+ static unsigned char * readop_pr; // for FAST_OP_FETCH cpu_readop16()
- #define change_pc(newpc) \
- sh2->pc = (newpc); \
- pSh2Ext->opbase = pSh2Ext->MemMap[ (sh2->pc >> SH2_SHIFT) + SH2_WADD * 2 ]; \
- pSh2Ext->opbase -= (sh2->pc & ~SH2_PAGEM);
+ #define change_pc(newpc) \
+ sh2->pc = (newpc); \
+ readop_pr = pSh2Ext->MemMap[ (sh2->pc >> SH2_SHIFT) + SH2_WADD * 2 ]; \
+ pSh2Ext->opbase = readop_pr - (sh2->pc & ~SH2_PAGEM);
#else
@@ -606,9 +607,9 @@
#if FAST_OP_FETCH
#ifdef LSB_FIRST
-#define cpu_readop16(A) *(unsigned short *)(pSh2Ext->opbase + ((A) ^ 0x02))
+#define cpu_readop16(A) ((uintptr_t)readop_pr >= SH2_MAXHANDLER) ? *(unsigned short *)(pSh2Ext->opbase + ((A) ^ 0x02)) : pSh2Ext->ReadWord[(uintptr_t)readop_pr](A);
#else
-#define cpu_readop16(A) (*(unsigned short *)(pSh2Ext->opbase + ((A))))
+#define cpu_readop16(A) ((uintptr_t)readop_pr >= SH2_MAXHANDLER) ? *(unsigned short *)(pSh2Ext->opbase + ((A))) : pSh2Ext->ReadWord[(uintptr_t)readop_pr](A);
#endif
#else
@@ -617,13 +618,13 @@
{
unsigned char * pr;
pr = pSh2Ext->MemMap[ (A >> SH2_SHIFT) + SH2_WADD * 2 ];
- if ( (unsigned int)pr >= SH2_MAXHANDLER ) {
+ if ( (uintptr_t)pr >= SH2_MAXHANDLER ) {
#ifdef LSB_FIRST
A ^= 2;
#endif
return *((unsigned short *)(pr + (A & SH2_PAGEM)));
}
- return pSh2Ext->ReadWord[(unsigned int)pr](A);
+ return pSh2Ext->ReadWord[pr](A);
}
#endif
@@ -3320,14 +3321,8 @@
do
{
-
- if ( pSh2Ext->suspend ) {
- sh2->sh2_total_cycles += cycles;
- sh2->sh2_icount = 0;
- break;
- }
-
- UINT16 opcode;
+ if (!pSh2Ext->suspend) {
+ UINT16 opcode;
if (sh2->delay) {
//opcode = cpu_readop16(WORD_XOR_BE((UINT32)(sh2->delay & AM)));
@@ -3361,7 +3356,7 @@
case 14<<12: op1110(opcode); break;
default: op1111(opcode); break;
}
-
+ }
#endif
if(sh2->test_irq && !sh2->delay)
I could test if arm is defined and switch between the 2 codes upon that. But i actually think there is something really wrong with the new code that needs to be fixed if i lose 15fps over those few lines. Anyone with better knowledge of C/C++ that could give me a hint ?