A funky grammar

You are given the following context-free grammar that produces a language L:

S -> | SS | aSaSb | aSbSa | bSaSa

It is easy to see that this grammar produces exactly the language M of all words over a, b that contains twice as many a's as b's. But is it easy to prove?

We claim that L ⊆ M is while proving M ⊆ L is not, so we give you some guidance: Assume w ∈ M. We prove w ∈ L by induction on the length of w.

For the induction step, consider w ∈ M with positive length and assume that the claim holds for all w′ of shorter length (IH).

Consider the function h(w):=2 * |w|_b − |w|_a. We have h(w)=0 iff w ∈ M. If h is zero for any prefix of w, then it is easy to establish w ∈ L with (IH). So assume the contrary for the remainder of the proof.

Now consider three cases:

w = bu. Thus h(u)= − 2. As the value of h for w does not have a zero, starts with b, and can only decrease by 1, we have u = vaa for some v. This means h(v)=0. Use the last production to show that the word is in the grammar.
w = ub. Analogous.
w = aua. As w does not have a zero, we know that h(v) never takes the value 1 for any prefix of u. With h(u)=2, we can conclude that u = v₁bv₂ for v₁, v₂ with h(v₁)=h(v₂)=0. Now use the fourth production to show w ∈ L.

You are given four tasks:

Show L ⊆ M
Prove a theorem that helps you to establish the essential step in (a), (b).
Prove a theorem that helps you to establish the essential step in (c).
Show M ⊆ L. You may want to exploit the symmetry of (a) and (b).

Resources

Download Files

Definitions File

theory Defs
  imports Main
begin

section ‹Grammars and Languages›

text ‹‹#w⇘c⇙› denotes the number of times the character ‹c› occurs in word ‹w››
abbreviation count :: "'a list ⇒ 'a ⇒ nat" ("#_⇘_⇙") where
  "count xs x ≡ count_list xs x"

abbreviation
  "a ≡ CHR ''a''"

abbreviation
  "b ≡ CHR ''b''"

definition prefix :: "'a list ⇒ 'a list ⇒ bool" (infix "≺" 50) where
  "prefix xs ys ≡ ∃ zs. ys = xs @ zs ∧ zs ≠ []"


section ‹Problem statement›

text ‹Consider the following grammar:›
inductive_set L :: "string set" where
  "[] ∈ L"
| "x @ y ∈ L" if "x ∈ L" "y ∈ L"
| "a # x @ a # y @ [b] ∈ L" if "x ∈ L" "y ∈ L"
| "a # x @ b # y @ [a] ∈ L" if "x ∈ L" "y ∈ L"
| "b # x @ a # y @ [a] ∈ L" if "x ∈ L" "y ∈ L"
text ‹Q: Can we delete any of these rules?›

definition
  "M = {w. set w ⊆ {a, b} ∧ #w⇘a⇙ = 2 * #w⇘b⇙}"

text ‹
  We want to prove that the grammar exactly produces the language of all words that contain
  twice as many a's than b's:›
lemma
  "L = M"
  oops

text ‹The height function›
definition
  "h w = 2 * int (#w⇘b⇙) - int (#w⇘a⇙)"

end

Template File

theory Submission
  imports Defs
begin

text ‹First task›
theorem L_sub: "L ⊆ M"
  sorry

text ‹Second task›
theorem h_first_zero:
  assumes "h (xs @ ys) < 0" "h xs ≥ 0"
  shows "∃as bs. ys = as @ bs ∧ h (xs @ as) = 0"
  sorry

text ‹Third task›
theorem h_2_split:
  assumes "h w = 2" "∀v. v ≺ w ⟶ h v ≠ 1"
  obtains x y where "h x = 0" "h y = 0" "w = x @ b # y"
  sorry

text ‹Fourth task›
theorem L_sup:
  assumes h_first_zero:
    "⋀xs ys. h (xs @ ys) < 0 ⟹ 0 ≤ h xs ⟹ ∃as bs. ys = as @ bs ∧ h (xs @ as) = 0"
  assumes h_2_split:
    "⋀t w. h w = 2 ⟹ ∀v. v ≺ w ⟶ h v ≠ 1 ⟹ (⋀x y. h x = 0 ⟹ h y = 0 ⟹ w = x @ b # y ⟹ t)
      ⟹ t"
  assumes "h w = 0" "set w ⊆ {a, b}"
  shows "w ∈ L"
  sorry

text ‹The final theorem›
corollary
  "L = M"
  using L_sub L_sup[OF h_first_zero h_2_split] unfolding h_def M_def by auto

end

Check File

theory Check
  imports Submission
begin

text ‹First task›
theorem L_sub: "L ⊆ M"
  by (rule Submission.L_sub)

text ‹Second task›
theorem h_first_zero:
  assumes "h (xs @ ys) < 0" "h xs ≥ 0"
  shows "∃as bs. ys = as @ bs ∧ h (xs @ as) = 0"
  using assms by (rule Submission.h_first_zero)

text ‹Third task›
theorem h_2_split:
  assumes "h w = 2" "∀v. v ≺ w ⟶ h v ≠ 1"
  obtains x y where "h x = 0" "h y = 0" "w = x @ b # y"
  using assms by (rule Submission.h_2_split)

text ‹Fourth task›
theorem L_sup:
  assumes h_first_zero:
    "⋀xs ys. h (xs @ ys) < 0 ⟹ 0 ≤ h xs ⟹ ∃as bs. ys = as @ bs ∧ h (xs @ as) = 0"
  assumes h_2_split:
    "⋀t w. h w = 2 ⟹ ∀v. v ≺ w ⟶ h v ≠ 1 ⟹ (⋀x y. h x = 0 ⟹ h y = 0 ⟹ w = x @ b # y ⟹ t)
      ⟹ t"
  assumes "h w = 0" "set w ⊆ {a, b}"
  shows "w ∈ L"
  using assms by (rule Submission.L_sup)

text ‹The final theorem›
corollary
  "L = M"
  using L_sub L_sup[OF h_first_zero h_2_split] unfolding h_def M_def by auto

end

Download Files

Definitions File

Require Export Ascii List.
Require Export Arith ZArith Lia.
Require Import ssreflect.
Export ListNotations.
Open Scope char_scope.

(** In this challenge, we assume classical logic. *)
Require Export Classical.

(** A simple formalization of (possibly infinite) sets as predicates. *)
Notation set A := (A -> Prop) (only parsing).

Definition Incl {U} (A B: set U) : Prop := forall x, A x -> B x.
Definition Union {U} (A B: set U): set U := fun x => A x \/ B x.
Definition Single {U} a : set U := fun x => x = a.
Definition Pair {U} a b : set U := fun x => x = a \/ x = b.
Definition Empty {U} := fun (x: U) => False.

Notation "A  ⊆  B" := (Incl A B) (at level 70).
Notation "A  ∪  B" := (Union A B) (at level 50).
Notation "'{ x }" := (Single x) (at level 1).
Notation "∅" := Empty.

Axiom set_ext : forall U (A B: set U), (forall x, A x <-> B x) -> A = B.

(* Simple goals involving sets can be solved by the [firstorder] tactic.
   This should be enough for our needs here. *)
Goal forall U (A B: set U) a b,
  '{a} ∪ ∅ ∪ B ∪ (A ∪ '{b} ∪ B) ⊆ Pair a b ∪ A ∪ B.
Proof. firstorder. (* boom. *) Qed.

(** Definitions on words (as lists of characters). *)

Notation word := (list ascii).

(* [#{c} w] denotes the number of times the character [c] occurs in the word [w]. *)
Notation "'#' '{' c '}'" := (fun w => count_occ ascii_dec w c) (at level 1).

Eval compute in #{"a"} ["a"; "b"; "c"]. (* there is one "a" in "abc" *)

Definition a := "a".
Definition b := "b".

(* The strict prefix relation *)
Definition prefix {A} (xs ys: list A) :=
  exists zs, ys = xs ++ zs /\ zs <> [].

Notation "xs  ≺  ys" := (prefix xs ys) (at level 50).

(* The set of letters in a word *)
Definition letters (w: word): set ascii := fun c => List.In c w.

Lemma letters_nil : letters [] = ∅.
Proof. reflexivity. Qed.
Lemma letters_cons x xs : letters (x :: xs) = '{x} ∪ letters xs.
Proof. rewrite /letters /Union /Single //=. apply set_ext. firstorder. Qed.
Lemma letters_app xs ys : letters (xs ++ ys) = letters xs ∪ letters ys.
Proof. rewrite /letters. apply set_ext. intro. rewrite in_app_iff //=. Qed.

(* Can be used by ssreflect's rewrite to rewrite with any of the lemmas in the
   tuple: use "rewrite lettersE" *)
Definition lettersE := (letters_nil, letters_cons, letters_app).

(** Problem statement *)

(* Consider the following grammar, which inductively defines a set of words: *)
Inductive L : set word :=
  | L_nil : L []
  | L_app : forall x y, L x -> L y ->
      L (x ++ y)
  | L_aab : forall x y, L x -> L y ->
      L (a :: x ++ a :: y ++ [b])
  | L_aba : forall x y, L x -> L y ->
      L (a :: x ++ b :: y ++ [a])
  | L_baa : forall x y, L x -> L y ->
      L (b :: x ++ a :: y ++ [a]).

(* We want to prove that the grammar exactly produces the language of all words
   that contain twice as many a's than b's: *)

Definition M : set word :=
  fun w => letters w ⊆ Pair a b /\ #{a} w = 2 * #{b} w.

Goal L = M.
Abort.

(* The height function. w is in M iff h(w) = 0. *)
Definition h (w: word): Z :=
  2 * Z.of_nat (#{b} w) - Z.of_nat (#{a} w).

Template File

Require Import Defs.

(* It is not required, but it is a good idea to import ssreflect to benefit
   from the better rewrite tactic. (by uncommenting the two lines below)

   ssreflect's rewrite cheatsheet: a rewrite invocation is of the form [rewrite foo bar baz],
   where each of "foo", "bar", "baz" can be:
   - "foo" where foo is a lemma: rewrites with the lemma ("-foo" rewrites in the other direction)
   - "!foo" where foo is a lemma: rewrites repeatedly with foo
   - "/foo" where foo is a definition: unfolds the definition ("-/foo" folds the definition)
   - "//": try to prove the goal or side-condition if it is trivial
   - "//=": like "//" but also simplify the goal (using "simp")
   - "(_: foo = bar)": ask the user to prove "foo = bar" as a subgoal, and rewrite with it
*)
(*
Require Import ssreflect.
Local Ltac done ::= first [ solve [ trivial | eauto | lia ] ].
*)

(* Prevent simpl/cbn from unfolding the multiplication, which is never a good
   idea. *)
Arguments Nat.mul : simpl never.
Arguments Z.mul : simpl never.


(** First task *)

Theorem L_sub : L ⊆ M.
Proof.
  (* todo *)
Admitted.


(** Second task *)

Theorem h_first_zero xs ys :
  (h (xs ++ ys) < 0)%Z ->
  (0 <= h xs)%Z ->
  exists zs ws, ys = zs ++ ws /\ h (xs ++ zs) = 0%Z.
Proof.
  (* todo *)
Admitted.


(** Third task *)

(* Hint: this is useful to reason by well-founded induction on words. *)
Definition word_len_lt := ltof _ (@length ascii).
Lemma word_len_wf : well_founded word_len_lt.
Proof. apply well_founded_ltof. Qed.

Definition smallest_word_st (P: word -> Prop) (w: word) :=
  P w /\ forall v, P v -> length w <= length v.

(* If [P] holds on some word [w], then we can consider "the" smallest word that
   satisfies [P]... *)
Lemma ex_smallest_word_st : forall (P: set word) (w: word),
  P w -> exists v, smallest_word_st P v.
Proof.
  (* todo *)
Admitted.

Theorem h_2_split w :
  h w = 2%Z ->
  (forall v, v ≺ w -> h v <> 1%Z) ->
  exists x y, h x = 0%Z /\ h y = 0%Z /\ w = x ++ [b] ++ y.
Proof.
  (* todo *)
Admitted.


(** Fourth task *)

Section L_sup.

Hypothesis h_first_zero : forall xs ys,
  (h (xs ++ ys) < 0)%Z ->
  (0 <= h xs)%Z ->
  exists zs ws, ys = zs ++ ws /\ h (xs ++ zs) = 0%Z.

Hypothesis h_2_split : forall w,
  h w = 2%Z ->
  (forall v, v ≺ w -> h v <> 1%Z) ->
  exists x y, h x = 0%Z /\ h y = 0%Z /\ w = x ++ [b] ++ y.

Theorem L_sup w :
  h w = 0%Z ->
  letters w ⊆ Pair a b ->
  L w.
Proof.
  (* todo *)
Admitted.

End L_sup.

(** The final theorem *)

Theorem final : L = M.
Proof.
  (* todo *)
Admitted.

Download Files

Definitions File

theory Defs
  imports Main
begin

section ‹Grammars and Languages›

text ‹‹#w⇘c⇙› denotes the number of times the character ‹c› occurs in word ‹w››
abbreviation count :: "'a list ⇒ 'a ⇒ nat" ("#_⇘_⇙") where
  "count xs x ≡ count_list xs x"

abbreviation
  "a ≡ CHR ''a''"

abbreviation
  "b ≡ CHR ''b''"

definition prefix :: "'a list ⇒ 'a list ⇒ bool" (infix "≺" 50) where
  "prefix xs ys ≡ ∃ zs. ys = xs @ zs ∧ zs ≠ []"


section ‹Problem statement›

text ‹Consider the following grammar:›
inductive_set L :: "string set" where
  "[] ∈ L"
| "x @ y ∈ L" if "x ∈ L" "y ∈ L"
| "a # x @ a # y @ [b] ∈ L" if "x ∈ L" "y ∈ L"
| "a # x @ b # y @ [a] ∈ L" if "x ∈ L" "y ∈ L"
| "b # x @ a # y @ [a] ∈ L" if "x ∈ L" "y ∈ L"
text ‹Q: Can we delete any of these rules?›

definition
  "M = {w. set w ⊆ {a, b} ∧ #w⇘a⇙ = 2 * #w⇘b⇙}"

text ‹
  We want to prove that the grammar exactly produces the language of all words that contain
  twice as many a's than b's:›
lemma
  "L = M"
  oops

text ‹The height function›
definition
  "h w = 2 * int (#w⇘b⇙) - int (#w⇘a⇙)"

end

Template File

theory Submission
  imports Defs
begin

text ‹First task›
theorem L_sub: "L ⊆ M"
  sorry

text ‹Second task›
theorem h_first_zero:
  assumes "h (xs @ ys) < 0" "h xs ≥ 0"
  shows "∃as bs. ys = as @ bs ∧ h (xs @ as) = 0"
  sorry

text ‹Third task›
theorem h_2_split:
  assumes "h w = 2" "∀v. v ≺ w ⟶ h v ≠ 1"
  obtains x y where "h x = 0" "h y = 0" "w = x @ b # y"
  sorry

text ‹Fourth task›
theorem L_sup:
  assumes h_first_zero:
    "⋀xs ys. h (xs @ ys) < 0 ⟹ 0 ≤ h xs ⟹ ∃as bs. ys = as @ bs ∧ h (xs @ as) = 0"
  assumes h_2_split:
    "⋀t w. h w = 2 ⟹ ∀v. v ≺ w ⟶ h v ≠ 1 ⟹ (⋀x y. h x = 0 ⟹ h y = 0 ⟹ w = x @ b # y ⟹ t)
      ⟹ t"
  assumes "h w = 0" "set w ⊆ {a, b}"
  shows "w ∈ L"
  sorry

text ‹The final theorem›
corollary
  "L = M"
  using L_sub L_sup[OF h_first_zero h_2_split] unfolding h_def M_def by auto

end

Check File

theory Check
  imports Submission
begin

text ‹First task›
theorem L_sub: "L ⊆ M"
  by (rule Submission.L_sub)

text ‹Second task›
theorem h_first_zero:
  assumes "h (xs @ ys) < 0" "h xs ≥ 0"
  shows "∃as bs. ys = as @ bs ∧ h (xs @ as) = 0"
  using assms by (rule Submission.h_first_zero)

text ‹Third task›
theorem h_2_split:
  assumes "h w = 2" "∀v. v ≺ w ⟶ h v ≠ 1"
  obtains x y where "h x = 0" "h y = 0" "w = x @ b # y"
  using assms by (rule Submission.h_2_split)

text ‹Fourth task›
theorem L_sup:
  assumes h_first_zero:
    "⋀xs ys. h (xs @ ys) < 0 ⟹ 0 ≤ h xs ⟹ ∃as bs. ys = as @ bs ∧ h (xs @ as) = 0"
  assumes h_2_split:
    "⋀t w. h w = 2 ⟹ ∀v. v ≺ w ⟶ h v ≠ 1 ⟹ (⋀x y. h x = 0 ⟹ h y = 0 ⟹ w = x @ b # y ⟹ t)
      ⟹ t"
  assumes "h w = 0" "set w ⊆ {a, b}"
  shows "w ∈ L"
  using assms by (rule Submission.L_sup)

text ‹The final theorem›
corollary
  "L = M"
  using L_sub L_sup[OF h_first_zero h_2_split] unfolding h_def M_def by auto

end

Download Files

Definitions File

(in-package "ACL2")

(defun ab-listp (lst)
  (if (endp lst) t
      (and (member (car lst) '(a b))
           (ab-listp (cdr lst)))))
           
(defun M (lst)
  (and (ab-listp lst)
       (equal (* 2 (count 'b lst))
              (count 'a lst))))

(defun zip-append (lst)
  (if (endp lst) nil
      (cons (append (caar lst) (cdar lst)) (zip-append (cdr lst)))
  )
)

(defun zip-3 (l1 l2 l3 lst)
  (if (endp lst) nil
      (cons (append (LIST l1) (caar lst) (LIST l2) (cdar lst) (LIST l3))
                    (zip-3 l1 l2 l3 (cdr lst)))
  )
)

(defun cons-all (hd l)
  (if (endp l) nil
      (cons (cons hd (car l)) (cons-all hd (cdr l)))
  )
)

(defun cart-prod (l1 l2)
  (if (endp l1) nil
      (append (cons-all (car l1) l2)
              (cart-prod (cdr l1) l2))
  )
)

(mutual-recursion
  ; generate words of length n
  (defun gen-L (n)
    (declare (xargs :measure (acl2-count (+ (* n 2) 1))))
    (if (zp n) '(nil)
        (append (zip-append (combine-L (1- n) 1))
                (zip-3 'a 'a 'b (combine-L (- n 3) 0))
                (zip-3 'a 'b 'a (combine-L (- n 3) 0))
                (zip-3 'b 'a 'a (combine-L (- n 3) 0)))
    ))
  (defun combine-L (n m)
    (declare (xargs :measure (acl2-count (+ (* n 2) 2))))
    (if (OR (not (natp n)) (not (natp m)) (> m n)) nil
        (append (cart-prod (gen-L n) (gen-L m))
                (cart-prod (gen-L m) (gen-L n))
                (combine-L (1- n) (1+ m)))
    ))
)

Template File

(in-package "ACL2")

(include-book "Defs")

(defthm length-L
  (equal (member w (gen-L (len w)))
         (M w)))

Check File

; The four lines just below are boilerplate, that is, the same for every
; problem.

(in-package "ACL2")
(include-book "Submission")
(set-enforce-redundancy t)
(include-book "Defs")

; The events below represent the theorem to be proved, and are copied from
; template.lisp.

(defthm length-L
  (equal (member w (gen-L (len w)))
         (M w)))

Download Files

Definitions File

theory Defs
  imports Main
begin

section ‹Grammars and Languages›

text ‹‹#w⇘c⇙› denotes the number of times the character ‹c› occurs in word ‹w››
abbreviation count :: "'a list ⇒ 'a ⇒ nat" ("#_⇘_⇙") where
  "count xs x ≡ count_list xs x"

abbreviation
  "a ≡ CHR ''a''"

abbreviation
  "b ≡ CHR ''b''"

definition prefix :: "'a list ⇒ 'a list ⇒ bool" (infix "≺" 50) where
  "prefix xs ys ≡ ∃ zs. ys = xs @ zs ∧ zs ≠ []"


section ‹Problem statement›

text ‹Consider the following grammar:›
inductive_set L :: "string set" where
  "[] ∈ L"
| "x @ y ∈ L" if "x ∈ L" "y ∈ L"
| "a # x @ a # y @ [b] ∈ L" if "x ∈ L" "y ∈ L"
| "a # x @ b # y @ [a] ∈ L" if "x ∈ L" "y ∈ L"
| "b # x @ a # y @ [a] ∈ L" if "x ∈ L" "y ∈ L"
text ‹Q: Can we delete any of these rules?›

definition
  "M = {w. set w ⊆ {a, b} ∧ #w⇘a⇙ = 2 * #w⇘b⇙}"

text ‹
  We want to prove that the grammar exactly produces the language of all words that contain
  twice as many a's than b's:›
lemma
  "L = M"
  oops

text ‹The height function›
definition
  "h w = 2 * int (#w⇘b⇙) - int (#w⇘a⇙)"

end

Template File

theory Submission
  imports Defs
begin

text ‹First task›
theorem L_sub: "L ⊆ M"
  sorry

text ‹Second task›
theorem h_first_zero:
  assumes "h (xs @ ys) < 0" "h xs ≥ 0"
  shows "∃as bs. ys = as @ bs ∧ h (xs @ as) = 0"
  sorry

text ‹Third task›
theorem h_2_split:
  assumes "h w = 2" "∀v. v ≺ w ⟶ h v ≠ 1"
  obtains x y where "h x = 0" "h y = 0" "w = x @ b # y"
  sorry

text ‹Fourth task›
theorem L_sup:
  assumes h_first_zero:
    "⋀xs ys. h (xs @ ys) < 0 ⟹ 0 ≤ h xs ⟹ ∃as bs. ys = as @ bs ∧ h (xs @ as) = 0"
  assumes h_2_split:
    "⋀t w. h w = 2 ⟹ ∀v. v ≺ w ⟶ h v ≠ 1 ⟹ (⋀x y. h x = 0 ⟹ h y = 0 ⟹ w = x @ b # y ⟹ t)
      ⟹ t"
  assumes "h w = 0" "set w ⊆ {a, b}"
  shows "w ∈ L"
  sorry

text ‹The final theorem›
corollary
  "L = M"
  using L_sub L_sup[OF h_first_zero h_2_split] unfolding h_def M_def by auto

end

Check File

theory Check
  imports Submission
begin

text ‹First task›
theorem L_sub: "L ⊆ M"
  by (rule Submission.L_sub)

text ‹Second task›
theorem h_first_zero:
  assumes "h (xs @ ys) < 0" "h xs ≥ 0"
  shows "∃as bs. ys = as @ bs ∧ h (xs @ as) = 0"
  using assms by (rule Submission.h_first_zero)

text ‹Third task›
theorem h_2_split:
  assumes "h w = 2" "∀v. v ≺ w ⟶ h v ≠ 1"
  obtains x y where "h x = 0" "h y = 0" "w = x @ b # y"
  using assms by (rule Submission.h_2_split)

text ‹Fourth task›
theorem L_sup:
  assumes h_first_zero:
    "⋀xs ys. h (xs @ ys) < 0 ⟹ 0 ≤ h xs ⟹ ∃as bs. ys = as @ bs ∧ h (xs @ as) = 0"
  assumes h_2_split:
    "⋀t w. h w = 2 ⟹ ∀v. v ≺ w ⟶ h v ≠ 1 ⟹ (⋀x y. h x = 0 ⟹ h y = 0 ⟹ w = x @ b # y ⟹ t)
      ⟹ t"
  assumes "h w = 0" "set w ⊆ {a, b}"
  shows "w ∈ L"
  using assms by (rule Submission.L_sup)

text ‹The final theorem›
corollary
  "L = M"
  using L_sub L_sup[OF h_first_zero h_2_split] unfolding h_def M_def by auto

end

Cookies disclaimer

A funky grammar

Resources

Definitions File

Template File

Check File

Definitions File

Template File

Definitions File

Template File

Check File

Definitions File

Template File

Check File

Definitions File

Template File

Check File