#check if its called with a filename - a file containing urls for pages with videos
#! bash
if [[ ! $1 ]] ; then#now make sure to have a unique filename, based on the file with the urls
echo "need to call with filename"
exit 1
filename=$(basename $1)#header info for the file with the results. Need to pull page url, video title, thumbnail url, description.
echo -e 'url\tpage\tTitle\tThumb\tDescription' > ${name}video-sitemap-data.txt#loop through the file and store in a variable
while read -r line; do# echo results and clean up with sed, tr and grep, then append to the file that has the column headers already. It has 4 elements - and each is isolated in its own part. The parts are connected with &&, and everything in ( and ) - otherwise it only echos the last part into the file.
filecontent=$(wget -qO- "$line")
(echo "$line" | sed 's/\r$/\t/' | tr '\n' '\t' && echo "$filecontent" | grep "og:video" | grep "swf" | sed -e "s/^.*content=\"//" -e "s/\".*$//" | sed 's/\r$/\t/' | tr '\n' '\t' && echo "$filecontent" | grep "og:title" | sed -e "s/^.*content=\"//" -e "s/\".*$//" | sed 's/\r$/\t/' | tr '\n' '\t' && echo "$filecontent" | grep "og:image" | sed -e "s/^.*content=\"//" -e "s/\".*$//" | sed 's/\r$/\t/' | tr '\n' '\t' && echo "$filecontent" | grep "og:description" | sed -e "s/^.*content=\"//" -e "s/\".*$//") >> ${name}video-sitemap-data.txt
done < "$1"
I'd be delighted to know this helped someone else - why don't you drop me a note when you do?
This is one of the pages that I used for testing, just in case someone wants to test this:
http://video.dell.com/details.php?oid=RraTJsZDrsDKFAjNcj2WmTovA1ovugc-&c=us&l=en&s=bsd&p=learn .
No comments:
Post a Comment