Text processing with awk sed and cut

awk sed and cut - tools to process text data in Linux OS

awk

openstack image list > images.txt
+----------------+--------------+-----------+
| ID             | Name         | Status    |
+----------------+--------------+-----------+
| 9789-4b3f-95b8 | windows-2012 | active    |
| 27a3-46c2-b821 | windows-2016 | active    |
| f45a-4c89-a69f | windows-2019 | active    |
| 012f-4751-822a | windows-2019 | active    |
| 68c8-4d4b-a5b5 | windows-2022 | active    |
+----------------+--------------+-----------+

extract only windows-2016 image id

# on a bash or zsh shell
ID=`awk '$4=="windows-2016" {print $2}' images.txt`
echo $ID
27a3-46c2-b821

# on a fish shell
set ID $(awk '$4=="windows-2016" {print $2}' images.txt)
echo $ID
27a3-46c2-b821

contains string

kubectl get pods -n ingress-nginx | awk '$1 ~ /ingress-nginx-controller/ {print $1}'

using a variable

# on a bash or zsh shell
IMAGE_NAME="windows-2022"
ID=`awk -v image_name=$IMAGE_NAME '$4==image_name {print $2}' images.txt`
echo $ID
68c8-4d4b-a5b5

# on a fish shell
set IMAGE_NAME 'windows-2022'
set ID $(awk -v image_name=$IMAGE_NAME '$4==image_name {print $2}' ~/tmp/images.txt)
echo $ID
68c8-4d4b-a5b5

find a process (middleman) and kill it

kill -9 $(ps -ef | grep middleman | awk '$8=="/usr/local/var/rbenv/versions/2.3.0/bin/middleman" {print $2}')
# find another process and kill it

cut

list status column

while read line
do [ -z "$line" ] && continue ;
echo $line|rev|cut -f5 -d' '|rev
done < images.txt

read all image names

while read line
do [ -z "$line" ] && continue ;
echo $line|cut -f4 -d' '
done < images.txt

get lines count

cat -n images.txt | tail -n 1 | cut -f1
awk '{print NF}' images.txt

more presentable

awk '{print NR "----" NF}' images.txt
# 1----1
# 2----7
# 3----1
# 4----7
# 5----7
# 6----7
# 7----7
# 8----7
# 9----1

column count for each line

while read line
do [ -z "$line" ] && continue ;
# echo $line
COUNT=`echo $line|awk '{print NF}'`
echo $COUNT
# echo $line|rev|cut -f5 -d' '|rev
done < images.txt
awk '{ if ($2!="ID") {print "Image", $2, "\n"} }' images.txt

awk with conditions

awk '{
id=$2;
first_line="+----------------+--------------+-----------+";
if($1!=first_line);
{ print "ID", $2, "NAME", $4, "STATUS", $6, "\n"};
}' images.txt

use a file with awk

cat << EOF > image.awk
{
id=$2;
name=$4;
status=$6;
first_line="+----------------+--------------+-----------+";
if ( id!="ID" );
{ print "ID", $2, "NAME", $4, "STATUS", $6, "\n"};
}
EOF

awk -f image.awk images.txt

using a file for input another file for command.

create a text file called kids.txt

cat << EOF >kids.txt
FEMIDA    8   Female      Berlin
FARZANA   3   Female      Chennai
YUSRA     6   Female      Howrah
AFIRA     10  Female      Thanjavur
FAZILA    4   Female      Thanjavur
WAFIQAH   13  Female      Madurai
FAZIL     10  Male        Santhamanikam
FAIZAL    8   Male        Santhamanikam
FEMINA    4   Female      Santhamanikam
EOF

create an awk command file called language.awk

cat << EOF >language.awk
{
name=$1;
if ( name=="FEMIDA" ) language="German";
else if (name=="FARZANA") language="English";
else if (name=="YUSRA") language="German";
else if (name=="AFIRA") language="English";
else if (name=="FAZILA") language="English";
else if (name=="WAFIQAH") language="English";
else if (name=="FAZIL") language="English";
else if (name=="FAIZAL") language="English";
else language="None";

if(language!="None") print "The favorite language of ", name, "is ", language;
else print "No person found";
}
EOF

run it

awk -f language.awk kids.txt

prints last column/field in each line

awk 'NF>1{print $NF}' kids.txt

prints total number of rows/lines in the file

awk 'END {print NR}' kids.txt

similar commands to acheive the same thing

wc -l < kids.txt
sed -n '$=' kids.txt

brew install gawk

get first field of a file

gawk -F: '{ print $1 }' kids.txt

create a file called marks.txt

cat << EOF >marks.txt
Tamil       95
English     100
Mathematics 100
Science     98
Social      98
EOF

prints the entire line if text found

grep '^Tamil' marks.txt

gawk '{ sum += $2 }; END { print "You scored ", sum, "out of 500 marks"}' marks.txt

output

You scored  491 out of 500 marks

Text manipulation with cut

-f3- prints column 3 onwards including column 3

echo "ABC DEF MNO PQR ZSA" | cut -d ' ' -f3-
# it shows from columns 3 onwards including 3
MNO PQR ZSA

-f3 prints column 3 only

# -d flag represents delimeter. in this case space is the delimeter.
echo "ABC DEF MNO PQR ZSA" | cut -d ' ' -f3
# output
MNO

use awk to extract

_input=kids.txt
while IFS= read -r line
do
  awk '{print substr($0, index($0,$4))}' <<< "$line"
done < "${_input}"

use cut to extract

echo "ONE TWO THREE FOUR FIVE" |  cut -d ' ' -f3-
THREE FOUR FIVE

sed Tool

replace just the text

sed -ie 's/Tamil/German/g' marks.txt

replace the entire line

sed -ie 's/.*German.*/Tamil  90/g' marks.txt

search something and write if not found

grep -q '^English' marks.txt && sed -ie 's/^English.*/English 95/' marks.txt
grep -q '^English' marks.txt && sed -ie 's/^English.*/English 100/' marks.txt || echo 'English   95' >> marks.txt

@ as a separator; find Social and replaces the line with ‘SocialScience 98’

sed -ie 's@^Social.*@SocialScience   98@g' marks.txt

finds Arabic in the file marks.txt, if not found writes it

grep -q "Arabic" marks.txt || echo "Arabic  99" >> marks.txt