You may come across an instance in your C# and ASP.NET programming where you need to
download an external webpage and parse the meta tags... specifically, the "Title,"
"Meta Description," and "Meta Keywords."
The method below will show you how to:
* download an external webpage
* parse the meta title
* parse the meta description
* parse the meta keywords
The parsing is done using regular expressions.
NOTE: This may not be the best way of doing this, but it is a solution that you can use.
view sourceprint
using System;
using System.Collections.Generic;
using System.Text;
using System.Net;
using System.Text.RegularExpressions;
using System.IO;
namespace Tim.Examples.Classes
{
public class WebMetaData
{
public string metaTitle;
public string metaDescription;
public string metaKeywords;
public bool GetMetaTags(string url)
{
try{
//get the HTML of the given page and put into a string
string html = AcquireHTML(url);
if (GetMeta(html))
{
return true;
}
else
{
return false;
}
}
catch(Exception ex)
{
// do something with the error
return false;
}
}
private string AcquireHTML(string address)
{
HttpWebRequest request;
HttpWebResponse response = null;
StreamReader reader;
StringBuilder sbSource;
try
{
// Create and initialize the web request
request = System.Net.WebRequest.Create(address) as HttpWebRequest;
request.UserAgent = "your-search-bot";
request.KeepAlive = false;
request.Timeout = 10 * 1000;
// Get response
response = request.GetResponse() as HttpWebResponse;
if (request.HaveResponse == true && response != null)
{
// Get the response stream
reader = new StreamReader(response.GetResponseStream());
// Read it into a StringBuilder
sbSource = new StringBuilder(reader.ReadToEnd());
response.Close();
// Console application output
return sbSource.ToString();
}
else
return "";
}
catch (Exception ex)
{
response.Close();
return "";
}
}
private bool GetMeta(string strIn)
{
try
{
// --- Parse the title
Match TitleMatch = Regex.Match(strIn, "<title>([^<]*)</title>,
RegexOptions.IgnoreCase | RegexOptions.Multiline);
metaTitle = TitleMatch.Groups[1].Value;
// --- Parse the meta keywords
Match KeywordMatch = Regex.Match(strIn, "<meta name=\"keywords\"
content=\"([^<]*)\">",
RegexOptions.IgnoreCase | RegexOptions.Multiline);
metaKeywords = KeywordMatch.Groups[1].Value;
// --- Parse the meta description
Match DescriptionMatch = Regex.Match(strIn, "<meta name=\"description\"
content=\"([^<]*)\">", RegexOptions.IgnoreCase | RegexOptions.Multiline);
metaDescription = DescriptionMatch.Groups[1].Value;
return true;
}
catch (Exception ex)
{
// do something with the error
return false;
}
}
}
}