<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic TSX example code doesn't work in Intel® ISA Extensions</title>
    <link>https://community.intel.com/t5/Intel-ISA-Extensions/TSX-example-code-doesn-t-work/m-p/1019413#M4992</link>
    <description>&lt;P&gt;I have intel xeon cpu E3-1230 v3 machine which has TSX.&lt;/P&gt;

&lt;P&gt;I just want to test that TSX runs well.&lt;/P&gt;

&lt;P&gt;From manual, i got this example pseucode&lt;/P&gt;

&lt;PRE class="brush:cpp;"&gt;void rtm_wrapped_lock(lock) {
   if (_xbegin() == _XBEGIN_STARTED) {
      if (lock is free)
         /* add lock to the read-set */
         return; /* Execute transactionally */
      _xabort(0xff);
      /* 0xff means the lock was not free */
   }
   /* come here following the transactional abort */
   original_locking_code(lock);
}

void rtm_wrapped_unlock(lock) {
   /* If lock is free, assume that the lock was elided */
   if (lock is free)
      _xend(); /* commit */
   else
      original_unlocking_code(lock);
}&lt;/PRE&gt;

&lt;P&gt;My test code for RTM which is a set of TSX is like this.&lt;/P&gt;

&lt;PRE class="brush:cpp;"&gt;void main(void)
{
   int i;
   int sum[20];
   int data[20];

   pthread_mutex_t mutex;

   pthread_mutex_init(&amp;amp;mutex,NULL);

   for(i=0;i&amp;lt;20;i++)
   {
      data&lt;I&gt;=i;
      sum&lt;I&gt;=0;
   }

   omp_set_num_threads(4);

#pragma omp parallel for private(i)
   for(i=0;i&amp;lt;20;i++)
   {
      if(_xbegin()==_XBEGIN_STARTED)
      {
         if(pthread_mutex_trylock(&amp;amp;mutex)==0)                  //if lock is free
         {
            /* transactional execution */
            pthread_mutex_unlock(&amp;amp;mutex);
            sum&lt;I&gt;+=data&lt;I&gt;;
            _xend();
         }
         else
            _xabort(0xff);
         }      
      else
      {
#pragma omp critical
         {
             pthread_mutex_lock(&amp;amp;mutex);
             sum&lt;I&gt;+=data&lt;I&gt;;
             pthread_mutex_unlock(&amp;amp;mutex);
         }
      }   
   }
}&lt;/I&gt;&lt;/I&gt;&lt;/I&gt;&lt;/I&gt;&lt;/I&gt;&lt;/I&gt;&lt;/PRE&gt;

&lt;P&gt;and&lt;/P&gt;

&lt;PRE class="brush:cpp;"&gt;#define _XBEGIN_STARTED		(~0u)
#define _XABORT_EXPLICIT	(1 &amp;lt;&amp;lt; 0)
#define _XABORT_RETRY		(1 &amp;lt;&amp;lt; 1)
#define _XABORT_CONFLICT	(1 &amp;lt;&amp;lt; 2)
#define _XABORT_CAPACITY	(1 &amp;lt;&amp;lt; 3)
#define _XABORT_DEBUG		(1 &amp;lt;&amp;lt; 4)
#define _XABORT_NESTED		(1 &amp;lt;&amp;lt; 5)
#define _XABORT_CODE(x)		(((x) &amp;gt;&amp;gt; 24) &amp;amp; 0xff)

#define __rtm_force_inline __attribute__((__always_inline__)) inline

static __rtm_force_inline int _xbegin(void)
{
	int ret = _XBEGIN_STARTED;
	asm volatile(".byte 0xc7,0xf8 ; .long 0" : "+a" (ret) :: "memory");
	return ret;
}

static __rtm_force_inline void _xend(void)
{
	 asm volatile(".byte 0x0f,0x01,0xd5" ::: "memory");
}

static __rtm_force_inline void _xabort(const unsigned int status)
{
	asm volatile(".byte 0xc6,0xf8,%P0" :: "i" (status) : "memory");
}

static __rtm_force_inline int _xtest(void)
{
	unsigned char out;
	asm volatile(".byte 0x0f,0x01,0xd6 ; setnz %0" : "=r" (out) :: "memory");
	return out;
}

#endif&lt;/PRE&gt;

&lt;P&gt;This is rtm.h code&lt;/P&gt;

&lt;P&gt;when executing this program, the source code can not enter the Xbegin status !!.&lt;/P&gt;

&lt;P&gt;All threads runs with fallback path !&lt;/P&gt;

&lt;P&gt;It means really all time lock is busy? or my test code is wrong for testing RTM?&lt;/P&gt;

&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Tue, 24 Feb 2015 05:18:55 GMT</pubDate>
    <dc:creator>YangHun_P_</dc:creator>
    <dc:date>2015-02-24T05:18:55Z</dc:date>
    <item>
      <title>TSX example code doesn't work</title>
      <link>https://community.intel.com/t5/Intel-ISA-Extensions/TSX-example-code-doesn-t-work/m-p/1019413#M4992</link>
      <description>&lt;P&gt;I have intel xeon cpu E3-1230 v3 machine which has TSX.&lt;/P&gt;

&lt;P&gt;I just want to test that TSX runs well.&lt;/P&gt;

&lt;P&gt;From manual, i got this example pseucode&lt;/P&gt;

&lt;PRE class="brush:cpp;"&gt;void rtm_wrapped_lock(lock) {
   if (_xbegin() == _XBEGIN_STARTED) {
      if (lock is free)
         /* add lock to the read-set */
         return; /* Execute transactionally */
      _xabort(0xff);
      /* 0xff means the lock was not free */
   }
   /* come here following the transactional abort */
   original_locking_code(lock);
}

void rtm_wrapped_unlock(lock) {
   /* If lock is free, assume that the lock was elided */
   if (lock is free)
      _xend(); /* commit */
   else
      original_unlocking_code(lock);
}&lt;/PRE&gt;

&lt;P&gt;My test code for RTM which is a set of TSX is like this.&lt;/P&gt;

&lt;PRE class="brush:cpp;"&gt;void main(void)
{
   int i;
   int sum[20];
   int data[20];

   pthread_mutex_t mutex;

   pthread_mutex_init(&amp;amp;mutex,NULL);

   for(i=0;i&amp;lt;20;i++)
   {
      data&lt;I&gt;=i;
      sum&lt;I&gt;=0;
   }

   omp_set_num_threads(4);

#pragma omp parallel for private(i)
   for(i=0;i&amp;lt;20;i++)
   {
      if(_xbegin()==_XBEGIN_STARTED)
      {
         if(pthread_mutex_trylock(&amp;amp;mutex)==0)                  //if lock is free
         {
            /* transactional execution */
            pthread_mutex_unlock(&amp;amp;mutex);
            sum&lt;I&gt;+=data&lt;I&gt;;
            _xend();
         }
         else
            _xabort(0xff);
         }      
      else
      {
#pragma omp critical
         {
             pthread_mutex_lock(&amp;amp;mutex);
             sum&lt;I&gt;+=data&lt;I&gt;;
             pthread_mutex_unlock(&amp;amp;mutex);
         }
      }   
   }
}&lt;/I&gt;&lt;/I&gt;&lt;/I&gt;&lt;/I&gt;&lt;/I&gt;&lt;/I&gt;&lt;/PRE&gt;

&lt;P&gt;and&lt;/P&gt;

&lt;PRE class="brush:cpp;"&gt;#define _XBEGIN_STARTED		(~0u)
#define _XABORT_EXPLICIT	(1 &amp;lt;&amp;lt; 0)
#define _XABORT_RETRY		(1 &amp;lt;&amp;lt; 1)
#define _XABORT_CONFLICT	(1 &amp;lt;&amp;lt; 2)
#define _XABORT_CAPACITY	(1 &amp;lt;&amp;lt; 3)
#define _XABORT_DEBUG		(1 &amp;lt;&amp;lt; 4)
#define _XABORT_NESTED		(1 &amp;lt;&amp;lt; 5)
#define _XABORT_CODE(x)		(((x) &amp;gt;&amp;gt; 24) &amp;amp; 0xff)

#define __rtm_force_inline __attribute__((__always_inline__)) inline

static __rtm_force_inline int _xbegin(void)
{
	int ret = _XBEGIN_STARTED;
	asm volatile(".byte 0xc7,0xf8 ; .long 0" : "+a" (ret) :: "memory");
	return ret;
}

static __rtm_force_inline void _xend(void)
{
	 asm volatile(".byte 0x0f,0x01,0xd5" ::: "memory");
}

static __rtm_force_inline void _xabort(const unsigned int status)
{
	asm volatile(".byte 0xc6,0xf8,%P0" :: "i" (status) : "memory");
}

static __rtm_force_inline int _xtest(void)
{
	unsigned char out;
	asm volatile(".byte 0x0f,0x01,0xd6 ; setnz %0" : "=r" (out) :: "memory");
	return out;
}

#endif&lt;/PRE&gt;

&lt;P&gt;This is rtm.h code&lt;/P&gt;

&lt;P&gt;when executing this program, the source code can not enter the Xbegin status !!.&lt;/P&gt;

&lt;P&gt;All threads runs with fallback path !&lt;/P&gt;

&lt;P&gt;It means really all time lock is busy? or my test code is wrong for testing RTM?&lt;/P&gt;

&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 24 Feb 2015 05:18:55 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-ISA-Extensions/TSX-example-code-doesn-t-work/m-p/1019413#M4992</guid>
      <dc:creator>YangHun_P_</dc:creator>
      <dc:date>2015-02-24T05:18:55Z</dc:date>
    </item>
    <item>
      <title>Hi,</title>
      <link>https://community.intel.com/t5/Intel-ISA-Extensions/TSX-example-code-doesn-t-work/m-p/1019414#M4993</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;

&lt;P&gt;the problems in the method you use for checking the lock state (&lt;SPAN style="color: rgb(0, 0, 0); font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace; font-size: 13.0080003738403px; line-height: 14.3088006973267px; background-color: rgb(248, 248, 248);"&gt;pthread_mutex_trylock +&amp;nbsp;&lt;/SPAN&gt;&lt;SPAN style="color: rgb(0, 0, 0); font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace; font-size: 13.0080003738403px; line-height: 14.3088006973267px;"&gt;pthread_mutex_unlock&lt;/SPAN&gt;). It modifies the lock state which causes conflicts on the lock variable -&amp;gt; transaction will most likely always abort. The method to check the lock state must not modify the lock state. Unfortunately the standard pthread lock does not provide such method.&lt;/P&gt;

&lt;P&gt;You should rather use one of the proven TSX/RTM lock elision implementations:&amp;nbsp;&lt;A href="http://www.intel.com/software/tsx"&gt;www.intel.com/software/tsx&lt;/A&gt; (&lt;STRONG style="box-sizing: border-box; color: rgb(102, 102, 102); font-family: Arial, Tahoma, Helvetica, sans-serif; font-size: 14px; line-height: 16.7999992370605px;"&gt;Lock implementations with Intel TSX support&lt;/STRONG&gt;).&lt;/P&gt;

&lt;P&gt;Best regards,&lt;/P&gt;

&lt;P&gt;Roman&lt;/P&gt;</description>
      <pubDate>Tue, 24 Feb 2015 07:17:38 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-ISA-Extensions/TSX-example-code-doesn-t-work/m-p/1019414#M4993</guid>
      <dc:creator>Roman_D_Intel</dc:creator>
      <dc:date>2015-02-24T07:17:38Z</dc:date>
    </item>
    <item>
      <title>Also if your glibc already</title>
      <link>https://community.intel.com/t5/Intel-ISA-Extensions/TSX-example-code-doesn-t-work/m-p/1019415#M4994</link>
      <description>&lt;P&gt;Also if your glibc already does RTM (which several modern distributions do) pthread_mutex_trylock would always force an abort, to enforce correct semantics. Otherwise trylock cannot know if a elided lock is hold or not.&lt;/P&gt;

&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 26 Feb 2015 02:29:36 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-ISA-Extensions/TSX-example-code-doesn-t-work/m-p/1019415#M4994</guid>
      <dc:creator>Andreas_K_Intel</dc:creator>
      <dc:date>2015-02-26T02:29:36Z</dc:date>
    </item>
    <item>
      <title>Additionally in the fail to</title>
      <link>https://community.intel.com/t5/Intel-ISA-Extensions/TSX-example-code-doesn-t-work/m-p/1019416#M4995</link>
      <description>&lt;P&gt;Additionally in the fail to begin section redundantly has an omp critical and mutex lock. If you are only using omp threads then the critical is sufficient. If you are using combination of omp threads and pthreads of your own then use the mutex.&lt;/P&gt;

&lt;P&gt;I&lt;/P&gt;

&lt;PRE class="brush:cpp;"&gt;void main(void)
{
&amp;nbsp;&amp;nbsp; int i;
&amp;nbsp;&amp;nbsp; int sum[20];
&amp;nbsp;&amp;nbsp; int data[20];

&amp;nbsp;&amp;nbsp; pthread_mutex_t mutex;

&amp;nbsp;&amp;nbsp; pthread_mutex_init(&amp;amp;mutex,NULL);

&amp;nbsp;&amp;nbsp; for(i=0;i&amp;lt;20;i++)
&amp;nbsp;&amp;nbsp; {
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; data&lt;I&gt;=i;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; sum&lt;I&gt;=0;
&amp;nbsp;&amp;nbsp; }

&amp;nbsp;&amp;nbsp; omp_set_num_threads(4);

#pragma omp parallel for private(i)
&amp;nbsp;&amp;nbsp; for(i=0;i&amp;lt;20;i++)
&amp;nbsp;&amp;nbsp; {
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; if(_xbegin()==_XBEGIN_STARTED)
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; {
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; /* transactional execution */
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; sum&lt;I&gt;+=data&lt;I&gt;;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; _xend();
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; }
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; else
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; {
#pragma omp critical
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; {
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; sum&lt;I&gt;+=data&lt;I&gt;; // in this case an omp atomic would be better
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; }
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; }&amp;nbsp;&amp;nbsp; 
&amp;nbsp;&amp;nbsp; }
}
&lt;/I&gt;&lt;/I&gt;&lt;/I&gt;&lt;/I&gt;&lt;/I&gt;&lt;/I&gt;&lt;/PRE&gt;

&lt;P&gt;In the above code, the TSX section is unwarranted due to each thread manipulating different ranges of &lt;I&gt;.&lt;/I&gt;&lt;/P&gt;

&lt;PRE class="brush:cpp;"&gt;void main(void)
{
&amp;nbsp;&amp;nbsp; int i,j, nCritical;
&amp;nbsp;&amp;nbsp; int sum[20];
&amp;nbsp;&amp;nbsp; int data[20];

&amp;nbsp;&amp;nbsp; pthread_mutex_t mutex;

&amp;nbsp;&amp;nbsp; pthread_mutex_init(&amp;amp;mutex,NULL);

&amp;nbsp;&amp;nbsp; for(i=0;i&amp;lt;20;i++)
&amp;nbsp;&amp;nbsp; {
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; data&lt;I&gt;=i;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; sum&lt;I&gt;=0;
&amp;nbsp;&amp;nbsp; }
&amp;nbsp;&amp;nbsp; nCritical = 0;
&amp;nbsp;&amp;nbsp; omp_set_num_threads(4);
#define LOOP_CNT
#pragma omp parallel for private(i,j)
&amp;nbsp;&amp;nbsp; for(j=0;j&amp;lt;LOOP_CNT;j++)
&amp;nbsp;&amp;nbsp; {
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; // each thread now competing for same &lt;I&gt;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; for(i=0;i&amp;lt;20;i++)
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; {
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; if(_xbegin()==_XBEGIN_STARTED)
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; {
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; /* transactional execution */
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; sum&lt;I&gt;+=data&lt;I&gt;;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; _xend();
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; }
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; else
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; {
#pragma omp critical
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; {
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; sum&lt;I&gt;+=data&lt;I&gt;;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; ++nCritical;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; }
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; }
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; }&amp;nbsp;&amp;nbsp; 
&amp;nbsp;&amp;nbsp; }
&amp;nbsp;&amp;nbsp; printf("TSX: %d, critical: %d\n", LOOP_CNT-nCritical, nCritical);
}
&lt;/I&gt;&lt;/I&gt;&lt;/I&gt;&lt;/I&gt;&lt;/I&gt;&lt;/I&gt;&lt;/I&gt;&lt;/PRE&gt;

&lt;P&gt;The above would better approximate your test. However, the zone of contention is only 2 cache lines and is highly contended. Therefore, it is expected to experience a very high number of&amp;nbsp; aborted transactions. A better facsimile might be:&lt;/P&gt;

&lt;PRE class="brush:cpp;"&gt;void main(void)
{
&amp;nbsp;&amp;nbsp; int i,j, nCritical;
&amp;nbsp;&amp;nbsp; int sum[20];
&amp;nbsp;&amp;nbsp; int data[20];

&amp;nbsp;&amp;nbsp; pthread_mutex_t mutex;

&amp;nbsp;&amp;nbsp; pthread_mutex_init(&amp;amp;mutex,NULL);

&amp;nbsp;&amp;nbsp; for(i=0;i&amp;lt;20;i++)
&amp;nbsp;&amp;nbsp; {
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; data&lt;I&gt;=i;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; sum&lt;I&gt;=0;
&amp;nbsp;&amp;nbsp; }
&amp;nbsp;&amp;nbsp; nCritical = 0;
&amp;nbsp;&amp;nbsp; omp_set_num_threads(4);
#define LOOP_CNT
#pragma omp parallel for private(i,j)
&amp;nbsp;&amp;nbsp; for(j=0;j&amp;lt;LOOP_CNT;j++)
&amp;nbsp;&amp;nbsp; {
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; // arbitrary do short do work
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; _mm_pause(); _mm_pause(); _mm_pause();
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; // now results of do work accumulated into sum[]
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; if(_xbegin()==_XBEGIN_STARTED)
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; {
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; /* transactional execution */
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; for(i=0;i&amp;lt;20;i++)
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; {
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; sum&lt;I&gt;+=data&lt;I&gt;;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; }
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; _xend();
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; }
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; else
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; {
#pragma omp critical
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; for(i=0;i&amp;lt;20;i++)
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; {
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; sum&lt;I&gt;+=data&lt;I&gt;;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; }
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; ++nCritical;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; }&amp;nbsp;&amp;nbsp; 
&amp;nbsp;&amp;nbsp; }
&amp;nbsp;&amp;nbsp; printf("TSX: %d, critical: %d\n", LOOP_CNT-nCritical, nCritical);
}
&lt;/I&gt;&lt;/I&gt;&lt;/I&gt;&lt;/I&gt;&lt;/I&gt;&lt;/I&gt;&lt;/PRE&gt;

&lt;P&gt;Jim Dempsey&lt;/P&gt;</description>
      <pubDate>Thu, 26 Feb 2015 13:16:29 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-ISA-Extensions/TSX-example-code-doesn-t-work/m-p/1019416#M4995</guid>
      <dc:creator>jimdempseyatthecove</dc:creator>
      <dc:date>2015-02-26T13:16:29Z</dc:date>
    </item>
    <item>
      <title>If sum[i] is potentially</title>
      <link>https://community.intel.com/t5/Intel-ISA-Extensions/TSX-example-code-doesn-t-work/m-p/1019417#M4996</link>
      <description>&lt;P&gt;If&amp;nbsp;&lt;SPAN style="color: rgb(0, 0, 0); font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace; font-size: 13.0080003738403px; line-height: 14.3088006973267px;"&gt;sum&lt;I&gt; is potentially updated by several threads then the usage of "&lt;/I&gt;&lt;/SPAN&gt;&lt;SPAN style="color: rgb(128, 128, 128); font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace; font-size: 13.0080003738403px; line-height: 14.3088006973267px; background-color: rgb(248, 248, 248);"&gt;#pragma omp critical&lt;/SPAN&gt;&lt;SPAN style="color: rgb(0, 0, 0); font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace; font-size: 13.0080003738403px; line-height: 14.3088006973267px;"&gt;" is wrong (race between the TSX execution and the fall-back). You must subscribe to the fall-back lock state inside the transaction (read the lock state) or use lock-free operations in the fall-back. Please consider &lt;A href="https://software.intel.com/en-us/blogs/2013/07/25/fun-with-intel-transactional-synchronization-extensions"&gt;this blog&lt;/A&gt;&amp;nbsp;describing the general issue with the&amp;nbsp;"&lt;/SPAN&gt;&lt;SPAN style="font-size: 13.0080003738403px; color: rgb(128, 128, 128); font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace; line-height: 14.3088006973267px; background-color: rgb(248, 248, 248);"&gt;#pragma omp critical&lt;/SPAN&gt;&lt;SPAN style="color: rgb(0, 0, 0); font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace; font-size: 13.0080003738403px; line-height: 14.3088006973267px;"&gt;" usage.&amp;nbsp;&lt;/SPAN&gt;&lt;/P&gt;

&lt;P&gt;&lt;SPAN style="color: rgb(0, 0, 0); font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace; font-size: 13.0080003738403px; line-height: 14.3088006973267px;"&gt;Best regards,&lt;/SPAN&gt;&lt;/P&gt;

&lt;P&gt;&lt;SPAN style="color: rgb(0, 0, 0); font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace; font-size: 13.0080003738403px; line-height: 14.3088006973267px;"&gt;Roman&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 26 Feb 2015 14:43:04 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-ISA-Extensions/TSX-example-code-doesn-t-work/m-p/1019417#M4996</guid>
      <dc:creator>Roman_D_Intel</dc:creator>
      <dc:date>2015-02-26T14:43:04Z</dc:date>
    </item>
    <item>
      <title>In the sample code above, the</title>
      <link>https://community.intel.com/t5/Intel-ISA-Extensions/TSX-example-code-doesn-t-work/m-p/1019418#M4997</link>
      <description>&lt;P&gt;In the sample code above, the protected regions are correct. *** however, in a practical application where the protected region manipulates cache lines or the same cache line multiple times, the above code outline is incorrect. In order to be correct, as Roman indicates you would need to add a pthread_mutex_is_locked(&amp;amp;mutex) function and do something like this:&lt;/P&gt;

&lt;PRE class="brush:cpp;"&gt;void main(void)
{
&amp;nbsp;&amp;nbsp; int i;
&amp;nbsp;&amp;nbsp; int sum[20];
&amp;nbsp;&amp;nbsp; int data[20];

&amp;nbsp;&amp;nbsp; pthread_mutex_t mutex;

&amp;nbsp;&amp;nbsp; pthread_mutex_init(&amp;amp;mutex,NULL);

&amp;nbsp;&amp;nbsp; for(i=0;i&amp;lt;20;i++)
&amp;nbsp;&amp;nbsp; {
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; data&lt;I&gt;=i;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; sum&lt;I&gt;=0;
&amp;nbsp;&amp;nbsp; }

&amp;nbsp;&amp;nbsp; omp_set_num_threads(4);

#pragma omp parallel for private(i)
&amp;nbsp;&amp;nbsp; for(i=0;i&amp;lt;20;i++)
&amp;nbsp;&amp;nbsp; {
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; // *** hypothetical pthread_mutex_is_lock, you must add this function
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; if(!pthread_mutex_is_locked(&amp;amp;mutex) &amp;amp;&amp;amp; _xbegin()==_XBEGIN_STARTED)
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; {
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; if(pthread_mutex_is_locked(&amp;amp;mutex))
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; _xabort(0xff);
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; /* transactional execution */
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; sum&lt;I&gt;+=data&lt;I&gt;;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; _xend();
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; }&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; else
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; {
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; pthread_mutex_lock(&amp;amp;mutex);
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; sum&lt;I&gt;+=data&lt;I&gt;;
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; pthread_mutex_unlock(&amp;amp;mutex);
&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; }&amp;nbsp;&amp;nbsp; 
&amp;nbsp;&amp;nbsp; }
}
&lt;/I&gt;&lt;/I&gt;&lt;/I&gt;&lt;/I&gt;&lt;/I&gt;&lt;/I&gt;&lt;/PRE&gt;

&lt;P&gt;The pthread_mutex_is_locked function can be an inline test of the mutex being not in free state. (you can extend this if you wish).&lt;/P&gt;

&lt;P&gt;Read the pthread.h and its include files to determine how to create a generic test function. This should have been part of the standard mutex functions.&lt;/P&gt;

&lt;P&gt;Jim Dempsey&lt;/P&gt;</description>
      <pubDate>Thu, 26 Feb 2015 16:11:40 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-ISA-Extensions/TSX-example-code-doesn-t-work/m-p/1019418#M4997</guid>
      <dc:creator>jimdempseyatthecove</dc:creator>
      <dc:date>2015-02-26T16:11:40Z</dc:date>
    </item>
  </channel>
</rss>

