6xxx strlen and 9xxx conclusions.
authorRichard W.M. Jones <rjones@redhat.com>
Wed, 14 Oct 2020 15:12:57 +0000 (16:12 +0100)
committerRichard W.M. Jones <rjones@redhat.com>
Wed, 14 Oct 2020 15:12:57 +0000 (16:12 +0100)
2020-frama-c/6000-strverscmp.html [new file with mode: 0644]
2020-frama-c/6100-strlen-1.html [new file with mode: 0644]
2020-frama-c/6200-strlen-2.html [new file with mode: 0644]
2020-frama-c/6300-strlen.html [new file with mode: 0644]
2020-frama-c/6400-strlen.html [new file with mode: 0644]
2020-frama-c/6500-strlen.term [new file with mode: 0755]
2020-frama-c/9000-conclusions-1.html [new file with mode: 0644]
2020-frama-c/9100-conclusions-2.html [new file with mode: 0644]
2020-frama-c/9200-alternatives.html [new file with mode: 0644]
2020-frama-c/9300-resources.html [new file with mode: 0644]

diff --git a/2020-frama-c/6000-strverscmp.html b/2020-frama-c/6000-strverscmp.html
new file mode 100644 (file)
index 0000000..132ae68
--- /dev/null
@@ -0,0 +1,78 @@
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
+<link rel="stylesheet" href="style.css" type="text/css"/>
+<script src="code.js" type="text/javascript"></script>
+
+<h1>glibc strverscmp</h1>
+
+<pre class="code">
+<span class="comment">/* Compare S1 and S2 as strings holding indices/version numbers,
+   returning less than, equal to or greater than zero if S1 is less than,
+   equal to or greater than S2 (for more info, see the texinfo doc).
+*/</span>
+
+int
+__strverscmp (const char *s1, const char *s2)
+{
+  const unsigned char *p1 = (const unsigned char *) s1;
+  const unsigned char *p2 = (const unsigned char *) s2;
+
+  /* Symbol(s)    0       [1-9]   others
+     Transition   (10) 0  (01) d  (00) x   */
+  static const uint8_t next_state[] =
+  {
+      /* state    x    d    0  */
+      /* S_N */  S_N, S_I, S_Z,
+      /* S_I */  S_N, S_I, S_I,
+      /* S_F */  S_N, S_F, S_F,
+      /* S_Z */  S_N, S_F, S_Z
+  };
+
+  static const int8_t result_type[] =
+  {
+      /* state   x/x  x/d  x/0  d/x  d/d  d/0  0/x  0/d  0/0  */
+
+      /* S_N */  CMP, CMP, CMP, CMP, LEN, CMP, CMP, CMP, CMP,
+      /* S_I */  CMP, -1,  -1,  +1,  LEN, LEN, +1,  LEN, LEN,
+      /* S_F */  CMP, CMP, CMP, CMP, CMP, CMP, CMP, CMP, CMP,
+      /* S_Z */  CMP, +1,  +1,  -1,  CMP, CMP, -1,  CMP, CMP
+  };
+
+  if (p1 == p2)
+    return 0;
+
+  unsigned char c1 = *p1++;
+  unsigned char c2 = *p2++;
+  /* Hint: '0' is a digit too.  */
+  int state = S_N + ((c1 == '0') + (__my_isdigit (c1) != 0));
+
+  int diff;
+  while ((diff = c1 - c2) == 0)
+    {
+      if (c1 == '\0')
+       return diff;
+
+      state = next_state[state];
+      c1 = *p1++;
+      c2 = *p2++;
+      state += (c1 == '0') + (__my_isdigit (c1) != 0);
+    }
+
+  state = result_type[state * 3 + (((c2 == '0') + (__my_isdigit (c2) != 0)))];
+
+  switch (state)
+  {
+    case CMP:
+      return diff;
+
+    case LEN:
+      while (__my_isdigit (*p1++))
+       if (!__my_isdigit (*p2++))
+         return 1;
+
+      return __my_isdigit (*p2) ? -1 : diff;
+
+    default:
+      return state;
+  }
+}
+</pre>
diff --git a/2020-frama-c/6100-strlen-1.html b/2020-frama-c/6100-strlen-1.html
new file mode 100644 (file)
index 0000000..c1e5d08
--- /dev/null
@@ -0,0 +1,20 @@
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
+<link rel="stylesheet" href="style.css" type="text/css"/>
+<script src="code.js" type="text/javascript"></script>
+
+<h1>glibc strlen (part 1)</h1>
+
+<pre class="code">
+size_t
+test_strlen (const char *str)
+{
+  const char *char_ptr;
+
+  <span class="comment">/* Handle the first few characters by reading one character at a time.
+     Do this until CHAR_PTR is aligned on a longword boundary.  */</span>
+  for (char_ptr = str; ((unsigned long int) char_ptr
+                       & (sizeof (longword) - 1)) != 0;
+       ++char_ptr)
+    if (*char_ptr == '\0')
+      return char_ptr - str;
+</pre>
diff --git a/2020-frama-c/6200-strlen-2.html b/2020-frama-c/6200-strlen-2.html
new file mode 100644 (file)
index 0000000..5e27410
--- /dev/null
@@ -0,0 +1,51 @@
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
+<link rel="stylesheet" href="style.css" type="text/css"/>
+<script src="code.js" type="text/javascript"></script>
+
+<h1>glibc strlen (part 2)</h1>
+
+<pre class="code">
+  const unsigned long int *longword_ptr;
+  unsigned long int longword, himagic, lomagic;
+
+  longword_ptr = (unsigned long int *) char_ptr;
+
+  himagic = 0x80808080L;
+  lomagic = 0x01010101L;
+  himagic = ((himagic << 16) << 16) | himagic;
+  lomagic = ((lomagic << 16) << 16) | lomagic;
+
+  for (;;)
+    {
+      longword = *longword_ptr++;
+
+      if (((longword - lomagic) & ~longword & himagic) != 0)
+       {
+         <span class="comment">/* Which of the bytes was the zero?  If none of them were, it was
+            a misfire; continue the search.  */</span>
+
+         const char *cp = (const char *) (longword_ptr - 1);
+
+         if (cp[0] == 0)
+           return cp - str;
+         if (cp[1] == 0)
+           return cp - str + 1;
+         if (cp[2] == 0)
+           return cp - str + 2;
+         if (cp[3] == 0)
+           return cp - str + 3;
+         if (sizeof (longword) > 4)
+           {
+             if (cp[4] == 0)
+               return cp - str + 4;
+             if (cp[5] == 0)
+               return cp - str + 5;
+             if (cp[6] == 0)
+               return cp - str + 6;
+             if (cp[7] == 0)
+               return cp - str + 7;
+           }
+       }
+    }
+}
+</pre>
diff --git a/2020-frama-c/6300-strlen.html b/2020-frama-c/6300-strlen.html
new file mode 100644 (file)
index 0000000..73e8964
--- /dev/null
@@ -0,0 +1,17 @@
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
+<link rel="stylesheet" href="style.css" type="text/css"/>
+<script src="code.js" type="text/javascript"></script>
+
+<h1>glibc strlen specification</h1>
+
+<pre class="code">
+  /*@
+    requires valid_read_string (str);
+    assigns \nothing;
+    ensures (\forall integer i; 0 <= i < \result ==> str[i] != '\0') &&
+            str[\result] == '\0';
+   */
+  size_t strlen (const char *str)
+  {
+    ...
+</pre>
diff --git a/2020-frama-c/6400-strlen.html b/2020-frama-c/6400-strlen.html
new file mode 100644 (file)
index 0000000..962f06b
--- /dev/null
@@ -0,0 +1,33 @@
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
+<link rel="stylesheet" href="style.css" type="text/css"/>
+<script src="code.js" type="text/javascript"></script>
+
+<h1>strlen proven, but not by me</h1>
+
+<pre class="code">
+size_t strlen(const char *s)
+{
+       const char *sc;
+       /*@ loop invariant s <= sc <= s + strlen(s);
+           loop invariant valid_str(sc);
+           loop invariant strlen(s) == strlen(sc) + (sc - s);
+           loop assigns sc;
+           loop variant strlen(s) - (sc - s);
+        */
+       for (sc = s; *sc != '\0'; ++sc)
+               /* nothing */;
+       return sc - s;
+}
+</pre>
+
+<p>
+Where is the <code>/*@</code> comment for this function?
+We’ll come to that in a minute ...
+</p>
+
+
+<p class="attribution">
+Proof by Denis Efremov, Ivannikov Institute for System Programming
+at the Russian Academy of Sciences,
+<a href="https://github.com/evdenis/verker/blob/master/src/strlen.c">https://github.com/evdenis/verker/blob/master/src/strlen.c</a>
+</p>
diff --git a/2020-frama-c/6500-strlen.term b/2020-frama-c/6500-strlen.term
new file mode 100755 (executable)
index 0000000..4cdf906
--- /dev/null
@@ -0,0 +1,14 @@
+#!/bin/bash -
+
+source functions
+
+# Title.
+export title="last_char()"
+
+# History.
+remember 'cat snippets/last_char.c'
+remember 'frama-c -wp -wp-rte snippets/last_char.c'
+remember 'less /usr/share/frama-c/libc/string.h'
+remember 'less /usr/share/frama-c/libc/__fc_string_axiomatic.h'
+
+terminal
diff --git a/2020-frama-c/9000-conclusions-1.html b/2020-frama-c/9000-conclusions-1.html
new file mode 100644 (file)
index 0000000..4426eb0
--- /dev/null
@@ -0,0 +1,17 @@
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
+<link rel="stylesheet" href="style.css" type="text/css"/>
+<script src="code.js" type="text/javascript"></script>
+
+<h1>In conclusion (part 1)</h1>
+
+<ul>
+<li> Frama-C is a real open source tool.
+
+<li> Not as hard to use as I imagined.
+
+<li> Mostly useful for small functions.
+
+<li> Forces you to think very hard about assumptions and corner cases.
+
+<li> Very well documented, questions answered quickly on stackoverflow.
+</ul>
diff --git a/2020-frama-c/9100-conclusions-2.html b/2020-frama-c/9100-conclusions-2.html
new file mode 100644 (file)
index 0000000..c5ea0b3
--- /dev/null
@@ -0,0 +1,17 @@
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
+<link rel="stylesheet" href="style.css" type="text/css"/>
+<script src="code.js" type="text/javascript"></script>
+
+<h1>In conclusion (part 2)</h1>
+
+<ul>
+<li> Typed Memory Model turns out to be restrictive.
+
+<li> No support for malloc.
+
+<li> Not good at bitwise ops (can Z3 help?).
+
+<li> Cannot parse some non-standard <code>__attribute__</code>s (glib).
+
+<li> Lacks some <code>__builtin_*</code> functions.
+</ul>
diff --git a/2020-frama-c/9200-alternatives.html b/2020-frama-c/9200-alternatives.html
new file mode 100644 (file)
index 0000000..2469692
--- /dev/null
@@ -0,0 +1,17 @@
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
+<link rel="stylesheet" href="style.css" type="text/css"/>
+<script src="code.js" type="text/javascript"></script>
+
+<h1>Alternatives and related programs</h1>
+
+<ul>
+<li> <a href="https://www.adacore.com/sparkpro">Ada SPARK Pro</a>
+  is a formally verified subset of the Ada programming language
+  (tagline: <i>"You simply can't write better code"</i>).
+
+<li> <a href="http://klee.github.io/">KLEE</a> has similarities
+  but is not a formal proof system.
+
+<li> <a href="http://compcert.inria.fr/">CompCert</a>
+  is INRIA's verified C compiler (not open source).
+</ul>
diff --git a/2020-frama-c/9300-resources.html b/2020-frama-c/9300-resources.html
new file mode 100644 (file)
index 0000000..dcbd13e
--- /dev/null
@@ -0,0 +1,30 @@
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
+<link rel="stylesheet" href="style.css" type="text/css"/>
+<script src="code.js" type="text/javascript"></script>
+
+<h1>Resources</h1>
+
+<ul>
+<li> Tutorial that I followed in October which I thought was a good
+  introduction: <br>
+  <a href="https://maniagnosis.crsr.net/2017/06/AFL-brute-force-search.html">https://maniagnosis.crsr.net/2017/06/AFL-brute-force-search.html</a> <br>
+  <a href="https://maniagnosis.crsr.net/2017/06/AFL-bug-in-quicksearch.html">https://maniagnosis.crsr.net/2017/06/AFL-bug-in-quicksearch.html</a> <br>
+  <a href="https://maniagnosis.crsr.net/2017/07/AFL-correctness-of-quicksearch.html">https://maniagnosis.crsr.net/2017/07/AFL-correctness-of-quicksearch.html</a>
+
+<li> Frama-C on Stack Overflow:
+  <a href="https://stackoverflow.com/questions/tagged/frama-c">https://stackoverflow.com/questions/tagged/frama-c</a>
+
+<li> Allan Blanchard's tutorial:
+  <a href="https://allan-blanchard.fr/publis/frama-c-wp-tutorial-en.pdf">https://allan-blanchard.fr/publis/frama-c-wp-tutorial-en.pdf</a>
+
+<li> David Mentré's introduction:
+  <a href="https://www.cs.umd.edu/class/spring2016/cmsc838G/frama-c/introduction-to-frama-c_v2.pdf">https://www.cs.umd.edu/class/spring2016/cmsc838G/frama-c/introduction-to-frama-c_v2.pdf</a>
+
+<li> Upstream documentation: <br>
+  Manual: <a href="https://frama-c.com/download/frama-c-user-manual.pdf">https://frama-c.com/download/frama-c-user-manual.pdf</a> <br>
+  WP manual: <a href="https://frama-c.com/download/frama-c-wp-manual.pdf">https://frama-c.com/download/frama-c-wp-manual.pdf</a> <br>
+  ACSL: <a href="https://frama-c.com/acsl.html">https://frama-c.com/acsl.html</a>
+
+<li> ACSL specifications for various string functions:
+  <a href="https://github.com/evdenis/verker">https://github.com/evdenis/verker</a>
+</ul>