Workdocumentation 2023-03-10


Wolfgang Fahl

ceur-ws-stats.sh

#!/bin/bash
# WF 2020-06-02
# script to check CEUR-WS statistics 

#
# get all volume directories
#
volumes() {
  for v in $(find . -name "Vol-*" -type d)
  do
    echo "$v"
  done
}

#
# count the static links to the given link in all
# html files found
#
getLinkCounts() {
  local l_link="$1"
  local l_nums="$2"
  for h in $(find . -name "*.html")
  do
    grep "$l_link" "$h"  | wc -l >> $l_nums
  done 
}

# see https://stackoverflow.com/a/9789898/1497139
calcStatistics() {
  local l_nums="$1"
  cat $l_nums | awk '
{
   a += $1;
   b[++i] = $1;
}
END {
    m = a/NR; # mean
    for (i in b)
    {
        d += (b[i]-m)^2;
        e += (b[i]-m)^3;
        f += (b[i]-m)^4;
    }
    va = d/NR; # variance
    sd = sqrt(va); # standard deviation
    sk = (e/NR)/sd^3; # skewness
    ku = (f/NR)/sd^4-3; # standardized kurtosis
    print "N,sum,mean,variance,std,SEM,skewness,kurtosis"
    print NR "," a "," m "," va "," sd "," sd/sqrt(NR) "," sk "," ku
}
'
}

linkStats() {
  nums=/tmp/ceur-ws-staticlinks.txt
  if [ ! -f $nums ]
  then
    echo "getting link counts"
    getLinkCounts  "http://ceur-ws.org" $nums
  fi
  wc -l $nums
  calcStatistics $nums
}

#
# check getting the versions
#
versioncheck() {
  tmp=/tmp/ceur-ws-versions-grep.txt
  rm $tmp
  for v in $(volumes) 
  do
    grep CEURVERSION $v/index.html  >> $tmp
  done
  wc -l $tmp
}
#
# get the list of versions
#
versionlist() {
  tmp=/tmp/ceur-ws-versions.txt
  rm $tmp
  for v in $(volumes) 
  do
    head -5 $v/index.html  | grep CEURVERSION | cut -f2 -d'=' | cut -f1 -d" " | sed '/-->/d' | awk '{$1=$1;print}' >> $tmp 
  done
  sort $tmp | uniq -c  | awk '
BEGIN {
  quote="\x22" 
  printf("{| class=%swikitable%s\n",quote,quote)
  print "|-"
  print "! # !! version !! volumes" 
}
{ 
  count=$1 
  sum+=count
  version=$2 
  print "|-"
  printf ("| %2d || %s || %4d \n",NR,version,count)
}
END {
  print "|-"
  printf ("|     || %s || %4d \n","sum",sum)
  print "|}"
}
'
}

#
# count the volumes
#
volcount() {
  nums=/tmp/ceur-ws-volumes.txt
  rm $nums
  for v in $(volumes)
  do
    echo $v >> $nums
  done
  wc -l $nums 
}

# 
# search by pattern
grepContent() {
  local l_pattern="$1"
  for v in $(volumes)
  do
    found=$(grep "$l_pattern" $v/index.html)
    if [ $? -eq 0 ]
    then
      echo $v:$found
    fi
  done
}

#
# count papers
#
papercount() {
  nums=/tmp/ceur-ws-volumes.txt
  if [ ! -f $nums ]
  then
    colcount
  fi
  for v in $(cat $nums)
  do
    find $v -name "*.pdf"
  done

}


# uncomment the statistics you'd like to apply
#grepContent "Christoph Lange"
#volcount
#papercount
versionlist
#versioncheck
#linkStats

ceur-ws-stats.sh[edit]

#!/bin/bash
# WF 2020-06-02
# script to check CEUR-WS statistics 

#
# get all volume directories
#
volumes() {
  for v in $(find . -name "Vol-*" -type d)
  do
    echo "$v"
  done
}

#
# count the static links to the given link in all
# html files found
#
getLinkCounts() {
  local l_link="$1"
  local l_nums="$2"
  for h in $(find . -name "*.html")
  do
    grep "$l_link" "$h"  | wc -l >> $l_nums
  done 
}

# see https://stackoverflow.com/a/9789898/1497139
calcStatistics() {
  local l_nums="$1"
  cat $l_nums | awk '
{
   a += $1;
   b[++i] = $1;
}
END {
    m = a/NR; # mean
    for (i in b)
    {
        d += (b[i]-m)^2;
        e += (b[i]-m)^3;
        f += (b[i]-m)^4;
    }
    va = d/NR; # variance
    sd = sqrt(va); # standard deviation
    sk = (e/NR)/sd^3; # skewness
    ku = (f/NR)/sd^4-3; # standardized kurtosis
    print "N,sum,mean,variance,std,SEM,skewness,kurtosis"
    print NR "," a "," m "," va "," sd "," sd/sqrt(NR) "," sk "," ku
}
'
}

linkStats() {
  nums=/tmp/ceur-ws-staticlinks.txt
  if [ ! -f $nums ]
  then
    echo "getting link counts"
    getLinkCounts  "http://ceur-ws.org" $nums
  fi
  wc -l $nums
  calcStatistics $nums
}

#
# check getting the versions
#
versioncheck() {
  tmp=/tmp/ceur-ws-versions-grep.txt
  rm $tmp
  for v in $(volumes) 
  do
    grep CEURVERSION $v/index.html  >> $tmp
  done
  wc -l $tmp
}
#
# get the list of versions
#
versionlist() {
  tmp=/tmp/ceur-ws-versions.txt
  rm $tmp
  for v in $(volumes) 
  do
    head -5 $v/index.html  | grep CEURVERSION | cut -f2 -d'=' | cut -f1 -d" " | sed '/-->/d' | awk '{$1=$1;print}' >> $tmp 
  done
  sort $tmp | uniq -c  | awk '
BEGIN {
  quote="\x22" 
  printf("{| class=%swikitable%s\n",quote,quote)
  print "|-"
  print "! # !! version !! volumes" 
}
{ 
  count=$1 
  sum+=count
  version=$2 
  print "|-"
  printf ("| %2d || %s || %4d \n",NR,version,count)
}
END {
  print "|-"
  printf ("|     || %s || %4d \n","sum",sum)
  print "|}"
}
'
}

#
# count the volumes
#
volcount() {
  nums=/tmp/ceur-ws-volumes.txt
  rm $nums
  for v in $(volumes)
  do
    echo $v >> $nums
  done
  wc -l $nums 
}

# 
# search by pattern
grepContent() {
  local l_pattern="$1"
  for v in $(volumes)
  do
    found=$(grep "$l_pattern" $v/index.html)
    if [ $? -eq 0 ]
    then
      echo $v:$found
    fi
  done
}

#
# count papers
#
papercount() {
  nums=/tmp/ceur-ws-volumes.txt
  if [ ! -f $nums ]
  then
    colcount
  fi
  for v in $(cat $nums)
  do
    find $v -name "*.pdf"
  done

}


# uncomment the statistics you'd like to apply
#grepContent "Christoph Lange"
#volcount
#papercount
versionlist
#versioncheck
#linkStats
🖨 🚪