#!/bin/bash filepath=$1 dir="$(dirname $filepath)" filename="$(basename $filepath)" noext="${filename%.*}" TXT="$dir/$noext.txt" cp $filepath $TXT sed -i "" 's/<p>//g' $TXT sed -i "" 's/<\/p>//g' $TXT sed -i "" 's/<h1>/#### /g' $TXT sed -i "" 's/<h2>/### /g' $TXT sed -i "" 's/<h3>/## /g' $TXT sed -i "" 's/<h4>/* /g' $TXT sed -i "" 's/<\/h1>/ ####/g' $TXT sed -i "" 's/<\/h2>/ ###/g' $TXT sed -i "" 's/<\/h3>/ ##/g' $TXT sed -i "" 's/<\/h4>/ */g' $TXT #sed -i "" 's/<br.*>/\n/g' $TXT #uncomment to replace <br> and <br /> tags with newlines sed -i "" "s/’/'/g" $TXT sed -i "" "s/‘/'/g" $TXT sed -i "" 's/“/"/g' $TXT sed -i "" 's/”/"/g' $TXT sed -i "" 's/"/"/g' $TXT sed -i "" 's/→/->/g' $TXT sed -i "" 's/&/&/g' $TXT sed -i "" 's/–/-/g' $TXT sed -i "" 's/é/e/g' $TXT #Remove any remaining html tags that we don't care about sed -e 's/<[^>]*>//g' $TXT > bar.txt #Capitalize all the titles that we just added perl -i -pe 's/#(.+)#/#\U$1#/gi' bar.txt mv bar.txt $TXT