Understanding your competitors is key to dominating the search engine results pages (SERPs). This Python script takes competitor analysis to the next level by not only extracting crucial on-page elements like titles, meta descriptions, H1 tags, and word counts but also performing n-gram analysis to identify the most frequently used words and phrases on top-ranking pages. Whether you’re crafting content or optimising your SEO strategy, this tool equips you with actionable insights.

In this detailed guide, you’ll find:


Full Python Script

import requests
from bs4 import BeautifulSoup
from urllib.parse import urlparse
import re
import csv
from collections import Counter
from nltk import ngrams

def extract_ngrams(text, n):
    words = re.findall(r"\w+", text.lower())
    return Counter(ngrams(words, n))

def fetch_serp_data(keyword, user_domain, top_n=5):
    search_url = f"https://www.google.com.au/search?q={keyword}&num={top_n+10}&hl=en&gl=au"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }

    response = requests.get(search_url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')

    results = []
    for result in soup.select(".tF2Cxc"):
        link = result.select_one("a")['href']
        domain = urlparse(link).netloc

        if user_domain in domain:
            continue

        title = result.select_one("h3").text if result.select_one("h3") else ""
        snippet = result.select_one(".IsZvec").text if result.select_one(".IsZvec") else ""

        page_details = fetch_page_details(link)

        results.append({
            "position": len(results) + 1,
            "url": link,
            "title": title,
            "snippet": snippet,
            **page_details
        })

        if len(results) >= top_n:
            break

    return results

def fetch_page_details(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')

    title = soup.title.string if soup.title else ""
    meta_desc = ""
    if soup.find("meta", attrs={"name": "description"}):
        meta_desc = soup.find("meta", attrs={"name": "description"}).get("content", "")

    h1_tags = [h1.text.strip() for h1 in soup.find_all("h1")]
    word_count = len(re.findall(r"\w+", soup.get_text()))

    visible_text = soup.get_text(separator=' ')

    unigrams = extract_ngrams(visible_text, 1)
    bigrams = extract_ngrams(visible_text, 2)
    trigrams = extract_ngrams(visible_text, 3)

    return {
        "meta_description": meta_desc,
        "h1_tags": "; ".join(h1_tags),
        "word_count": word_count,
        "unigrams": dict(unigrams.most_common(10)),
        "bigrams": dict(bigrams.most_common(10)),
        "trigrams": dict(trigrams.most_common(10)),
    }

def save_to_csv(data, filename):
    with open(filename, 'w', newline='', encoding='utf-8') as output_file:
        writer = csv.writer(output_file)
        writer.writerow(["Keyword", "Position", "URL", "Title", "Meta Description", "H1 Tags", "Word Count", "Top Unigrams", "Top Bigrams", "Top Trigrams"])

        for row in data:
            writer.writerow(row)

def main():
    keywords = input("Enter keywords (comma-separated): ").split(',')
    user_domain = input("Enter your domain (e.g., example.com): ").strip()

    all_results = []

    for keyword in keywords:
        keyword = keyword.strip()
        print(f"\nAnalyzing keyword: {keyword}\n")

        serp_results = fetch_serp_data(keyword, user_domain)
        for result in serp_results:
            all_results.append([
                keyword,
                result["position"],
                result["url"],
                result["title"],
                result["meta_description"],
                result["h1_tags"],
                result["word_count"],
                result["unigrams"],
                result["bigrams"],
                result["trigrams"]
            ])

        user_result = next((result for result in serp_results if user_domain in result["url"]), None)
        if user_result:
            all_results.append([
                keyword,
                "Your Domain",
                user_result["url"],
                user_result["title"],
                user_result["meta_description"],
                user_result["h1_tags"],
                user_result["word_count"],
                user_result["unigrams"],
                user_result["bigrams"],
                user_result["trigrams"]
            ])
        else:
            all_results.append([
                keyword,
                "Your Domain",
                "No Result",
                "No Result",
                "No Result",
                "No Result",
                "No Result",
                "No Result",
                "No Result",
                "No Result"
            ])

    save_to_csv(all_results, "serp_analysis_results_with_ngrams.csv")
    print("\nResults saved to serp_analysis_results_with_ngrams.csv")

if __name__ == "__main__":
    main()

Detailed Component Breakdown

1. Extracting N-grams

Purpose: The extract_ngrams function identifies the most common sequences of words (unigrams, bigrams, trigrams) on each page. These reveal common themes and keyword patterns competitors use in their content.

2. Fetching SERP Data

Purpose: This function sends a query to Google and collects data about the top 5 organic results for each keyword. It excludes results from your domain to focus solely on competitors.

3. Fetching Page Details

Purpose: For each result, the fetch_page_details function collects detailed on-page data:

4. Saving Results to CSV

Purpose: This ensures all results are exported to a CSV file for easy access. The file includes:


How to Use

  1. Install Dependencies: Ensure Python 3.x is installed, and install the required libraries by running:pip install requests beautifulsoup4 nltk
  2. Save the Script: Save the script to a file, e.g., serp_analysis_with_ngrams.py.
  3. Run the Script: Open a terminal, navigate to the file location, and execute:python serp_analysis_with_ngrams.pyEnter the keywords (comma-separated) and your domain as prompted.
  4. View the Results: Open the generated file serp_analysis_results_with_ngrams.csv to review detailed insights for each keyword.

Why This Script is Essential

Leave a Reply

Your email address will not be published. Required fields are marked *