Grab from George Birge website and display on a map

Start by importing everything we need

pip install selenium
Requirement already satisfied: selenium in /Users/palmerjones/opt/anaconda3/lib/python3.9/site-packages (4.20.0)
Requirement already satisfied: urllib3[socks]<3,>=1.26 in /Users/palmerjones/opt/anaconda3/lib/python3.9/site-packages (from selenium) (1.26.11)
Requirement already satisfied: typing_extensions>=4.9.0 in /Users/palmerjones/opt/anaconda3/lib/python3.9/site-packages (from selenium) (4.11.0)
Requirement already satisfied: trio-websocket~=0.9 in /Users/palmerjones/opt/anaconda3/lib/python3.9/site-packages (from selenium) (0.11.1)
Requirement already satisfied: certifi>=2021.10.8 in /Users/palmerjones/opt/anaconda3/lib/python3.9/site-packages (from selenium) (2022.9.24)
Requirement already satisfied: trio~=0.17 in /Users/palmerjones/opt/anaconda3/lib/python3.9/site-packages (from selenium) (0.25.0)
Requirement already satisfied: attrs>=23.2.0 in /Users/palmerjones/opt/anaconda3/lib/python3.9/site-packages (from trio~=0.17->selenium) (23.2.0)
Requirement already satisfied: idna in /Users/palmerjones/opt/anaconda3/lib/python3.9/site-packages (from trio~=0.17->selenium) (3.3)
Requirement already satisfied: sniffio>=1.3.0 in /Users/palmerjones/opt/anaconda3/lib/python3.9/site-packages (from trio~=0.17->selenium) (1.3.1)
Requirement already satisfied: sortedcontainers in /Users/palmerjones/opt/anaconda3/lib/python3.9/site-packages (from trio~=0.17->selenium) (2.4.0)
Requirement already satisfied: exceptiongroup in /Users/palmerjones/opt/anaconda3/lib/python3.9/site-packages (from trio~=0.17->selenium) (1.2.1)
Requirement already satisfied: outcome in /Users/palmerjones/opt/anaconda3/lib/python3.9/site-packages (from trio~=0.17->selenium) (1.3.0.post0)
Requirement already satisfied: wsproto>=0.14 in /Users/palmerjones/opt/anaconda3/lib/python3.9/site-packages (from trio-websocket~=0.9->selenium) (1.2.0)
Requirement already satisfied: PySocks!=1.5.7,<2.0,>=1.5.6 in /Users/palmerjones/opt/anaconda3/lib/python3.9/site-packages (from urllib3[socks]<3,>=1.26->selenium) (1.7.1)
Requirement already satisfied: h11<1,>=0.9.0 in /Users/palmerjones/opt/anaconda3/lib/python3.9/site-packages (from wsproto>=0.14->trio-websocket~=0.9->selenium) (0.14.0)
Note: you may need to restart the kernel to use updated packages.
pip install folium
Requirement already satisfied: folium in /Users/palmerjones/opt/anaconda3/lib/python3.9/site-packages (0.16.0)
Requirement already satisfied: xyzservices in /Users/palmerjones/opt/anaconda3/lib/python3.9/site-packages (from folium) (2024.4.0)
Requirement already satisfied: branca>=0.6.0 in /Users/palmerjones/opt/anaconda3/lib/python3.9/site-packages (from folium) (0.7.2)
Requirement already satisfied: numpy in /Users/palmerjones/opt/anaconda3/lib/python3.9/site-packages (from folium) (1.26.4)
Requirement already satisfied: requests in /Users/palmerjones/opt/anaconda3/lib/python3.9/site-packages (from folium) (2.28.1)
Requirement already satisfied: jinja2>=2.9 in /Users/palmerjones/opt/anaconda3/lib/python3.9/site-packages (from folium) (3.1.4)
Requirement already satisfied: MarkupSafe>=2.0 in /Users/palmerjones/opt/anaconda3/lib/python3.9/site-packages (from jinja2>=2.9->folium) (2.0.1)
Requirement already satisfied: certifi>=2017.4.17 in /Users/palmerjones/opt/anaconda3/lib/python3.9/site-packages (from requests->folium) (2022.9.24)
Requirement already satisfied: charset-normalizer<3,>=2 in /Users/palmerjones/opt/anaconda3/lib/python3.9/site-packages (from requests->folium) (2.0.4)
Requirement already satisfied: idna<4,>=2.5 in /Users/palmerjones/opt/anaconda3/lib/python3.9/site-packages (from requests->folium) (3.3)
Requirement already satisfied: urllib3<1.27,>=1.21.1 in /Users/palmerjones/opt/anaconda3/lib/python3.9/site-packages (from requests->folium) (1.26.11)
Note: you may need to restart the kernel to use updated packages.
pip install geopy
Requirement already satisfied: geopy in /Users/palmerjones/opt/anaconda3/lib/python3.9/site-packages (2.4.1)
Requirement already satisfied: geographiclib<3,>=1.52 in /Users/palmerjones/opt/anaconda3/lib/python3.9/site-packages (from geopy) (2.0)
Note: you may need to restart the kernel to use updated packages.
from selenium import webdriver
from selenium.webdriver.common.by import By
import pandas as pd
import os
import folium
from geopy.geocoders import Nominatim
/Users/palmerjones/opt/anaconda3/lib/python3.9/site-packages/pandas/core/computation/expressions.py:21: UserWarning: Pandas requires version '2.8.4' or newer of 'numexpr' (version '2.8.3' currently installed).
  from pandas.core.computation.check import NUMEXPR_INSTALLED
/Users/palmerjones/opt/anaconda3/lib/python3.9/site-packages/pandas/core/arrays/masked.py:60: UserWarning: Pandas requires version '1.3.6' or newer of 'bottleneck' (version '1.3.5' currently installed).
  from pandas.core import (
from bs4 import BeautifulSoup
import re
from datetime import datetime
import random
import csv

All of the variables we need to run the program

# URL of the website to scrape
url = 'https://www.georgebirge.com/tour'

# The csv file that we're saving all the performances to, and reading from
file_name = "performances_GB.csv"

Do the web scraping

# Initialize Safari WebDriver
driver = webdriver.Safari()
driver.implicitly_wait(10) #wait up to 10 secs for the page to load

# Open the URL in the browser
driver.get(url)

# define performance class
class Performance:
    def __init__(self, raw_html, raw_text):
        self.raw_html = raw_html
        self.raw_text = raw_text
        
# define performances list of objects
performances = []

# Find all div elements with class="sqs-tourdates__venue-name"
performance_cards = driver.find_elements(By.CLASS_NAME, 'sqs-tourdates__item')

# Save the html
for performance_card in performance_cards:
    # create the object for this performance card
    performance = Performance(performance_card.get_attribute('outerHTML'), '\n'.join(line for line in performance_card.text.strip().splitlines() if line.strip()))
    # append to the list of objects
    performances.append(performance)
    # print the raw text just to make sure we're grabbing the right thing
    #print(performance.raw_text)

# Close the browser
driver.quit()

Break down the html data into the performance object properties

# This will make displaying whatever info we want really easy down the road
# I'm going to use BeautifulSoup for this because I think it's a bit more user friendly

for performance in performances:
    # get each raw_html into BeautifulSoup
    soup = BeautifulSoup(performance.raw_html, 'html.parser')
    # find the datetime of the performance [0]
    performance.datetime = soup.find('span', class_='sqs-tourdates__timeframe')['data-tour-datetime']
    # find the date of the performance [1]
    performance.date = datetime.strptime(performance.datetime, "%Y-%m-%dT%H:%M:%S").strftime("%Y-%m-%d")
    # find the venue of the performance [2]
    performance.venue = soup.find('div', class_='sqs-tourdates__venue-name').text.strip().split('@')[0].strip()
    # find the location of the venue
    performance.location = soup.find('a', class_='sqs-tourdates__venue-link').text.strip()
    split_string = performance.location.split(',')
    performance.city = split_string[0].strip()  # Remove leading and trailing whitespace [3]
    performance.state = split_string[1].strip()  # Remove leading and trailing whitespace [4]
    performance.country = split_string[2].strip()  # Remove leading and trailing whitespace [5]
    # find the details (generally the other artists that will be there)
    performance_details = soup.find_all('div', class_='sqs-tourdates__lineup-item')
    performance.details = '' # set up as empty string [6]
    for performance_detail in performance_details:
        performance.details = performance.details + performance_detail.text.strip() + '|'
    performance.details = performance.details[:-1]
    # find the link to tickets
    try:
        performance.ticket_link = soup.find('a', class_='sqs-editable-button sqs-button-element--primary sqs-tourdates__button', text=lambda s: "Tickets" in s)['href'].strip()
    except:
        performance.ticket_link = ''
    # find the link to RSVP
    try:
        performance.rsvp_link = soup.find('a', class_='sqs-editable-button sqs-button-element--primary sqs-tourdates__button', text=lambda s: "RSVP" in s)['href'].strip()
    except:
        performance.rsvp_link = ''
    # find the link to Presale, this often doesn't exist so we just put None
    try:
        performance.presale_link = soup.find('a', class_='sqs-editable-button sqs-button-element--primary sqs-tourdates__button', text=lambda s: "Presale" in s)['href'].strip()
    except:
        performance.presale_link = ''
    performance.latitude = None # [7]
    performance.longitude = None # [8]

For website performances, find the coordinates

geolocator = Nominatim(user_agent="[!!INSERT YOUR EMAIL HERE!!]")

for performance in performances:
    if (performance.latitude == None) or (performance.longitude == None):
        coordinates_query = f"{performance.city}, {performance.state}, {performance.country}"
        coordinates = geolocator.geocode(coordinates_query)
        performance.latitude = coordinates.latitude + random.uniform(-0.01, 0.01) # so pins don't fall directly on top of one another
        performance.longitude = coordinates.longitude + random.uniform(-0.01, 0.01)
    if datetime.strptime(performance.date, "%Y-%m-%d") > datetime.today():
        performance.color = "blue"
    else:
        performance.color = "lightgray"
    #print(performance.color)
    print(f"Coordinates for {performance.city}: Latitude = {performance.latitude}, Longitude = {performance.longitude}")
Coordinates for Alpharetta: Latitude = 34.06756708540253, Longitude = -84.27134967358556
Coordinates for Jacksonville: Latitude = 30.336827401223776, Longitude = -81.65069630244315
Coordinates for Nashville: Latitude = 36.171365557542956, Longitude = -86.77342730802815
Coordinates for Myrtle Beach: Latitude = 33.688202266164595, Longitude = -78.88935246405087
Coordinates for Nashville: Latitude = 36.166439181403994, Longitude = -86.77076419984124
Coordinates for Bend: Latitude = 44.05517213756802, Longitude = -121.31255800866063
Coordinates for Auburn: Latitude = 47.30599077094418, Longitude = -122.23204990799502
Coordinates for Nampa: Latitude = 43.56767959453048, Longitude = -116.56843349196099
Coordinates for Harrisburg: Latitude = 40.27481225049882, Longitude = -76.88348970555096
Coordinates for Camden: Latitude = 39.95080291470532, Longitude = -75.11150383353399
Coordinates for Bristow: Latitude = 38.715987892413914, Longitude = -77.5458948872787
Coordinates for Virginia Beach: Latitude = 36.84369763754082, Longitude = -75.96918500473055
Coordinates for Dewey Beach: Latitude = 38.686384804945526, Longitude = -75.07618841824008
Coordinates for Uncasville: Latitude = 41.435135368632295, Longitude = -72.11542203863725
Coordinates for Cuyahoga Falls: Latitude = 41.144585744447234, Longitude = -81.48261838525265
Coordinates for Burgettstown: Latitude = 40.38413642485206, Longitude = -80.39585314127807
Coordinates for Jacksonville: Latitude = 30.341086687394412, Longitude = -81.66501074172102
Coordinates for Charleston: Latitude = 32.79493092809718, Longitude = -79.94108099785572
Coordinates for Raleigh: Latitude = 35.779116716251714, Longitude = -78.64395996790658
Coordinates for Albuquerque: Latitude = 35.08275197749372, Longitude = -106.64680695747239
Coordinates for Greenwood Village: Latitude = 39.62678817079165, Longitude = -104.94861537209026
Coordinates for West Valley City: Latitude = 40.69711844862041, Longitude = -111.98878018951689
Coordinates for Bonner: Latitude = 46.87756085529895, Longitude = -113.85686723780591
Coordinates for San Diego: Latitude = 32.717605213761786, Longitude = -117.16783382916417
Coordinates for Los Angeles: Latitude = 34.049765203470116, Longitude = -118.24991614673777
Coordinates for San Bernardino: Latitude = 34.818532867760425, Longitude = -116.09207286347338
Coordinates for Montréal: Latitude = 45.4971035229452, Longitude = -73.5795452340849
Coordinates for Oklahoma City: Latitude = 35.463254203851506, Longitude = -97.52001535736535
Coordinates for Des Moines: Latitude = 41.59206953341399, Longitude = -93.61590442137607
Coordinates for Allentown: Latitude = 40.60201023720133, Longitude = -75.47616043037634
Coordinates for Somerset: Latitude = 45.13164433194328, Longitude = -92.66519662357882
Coordinates for Peoria: Latitude = 40.695042742034595, Longitude = -89.5897747086285
Coordinates for Noblesville: Latitude = 40.05019319494188, Longitude = -86.01711577914554
Coordinates for West Palm Beach: Latitude = 26.707647399562045, Longitude = -80.06327284521662
Coordinates for Tampa: Latitude = 27.9514396314976, Longitude = -82.45432720069637
Coordinates for Sacramento: Latitude = 38.57320891596573, Longitude = -121.49400703864724

Import the existing csv of performances, if it exists, make into objects and then add to performances list

# Initialize an empty list to store the data
csv_data = []

# Check if the file exists
if os.path.exists(file_name):
    # Open the file in read mode
    with open(file_name, 'r') as csv_file:
        # Create a CSV reader object
        csv_reader = csv.reader(csv_file)

        # Read each row from the CSV file and append it to the data list
        for row in csv_reader:
            csv_data.append(row)

    print("Data imported successfully:")
    #for row in csv_data:
    #    print(row)
else:
    print(f"The file {file_name} does not exist.")
Data imported successfully:

Compare the the new locations we found

if len(csv_data) == 0:
    print("no data from csv")
else:
    for row in csv_data:
        match_found = False # initialize
        for performance in performances:
            if row[1] == performance.date:
                if row[3] == performance.city:
                    if row[2] == performance.venue:
                        print('Match found: ' + row[1] + ', ' + row[3] + ', ' + row[2])
                        match_found = True
                        break # if we find a match, we skip this performance and move on to the next row in csv_data without adding
        if not match_found:
            print('No match found.')
            performance = Performance('','') #skipped if we found a match in the website already there
            performance.datetime = row[0]
            performance.date = row[1]
            performance.venue = row[2]
            # performance.location = row[]
            performance.city = row[3]
            performance.state = row[4]
            performance.country = row[5]
            performance.details = row[6]
            # performance.ticket_link
            # performance.rsvp_link
            # performance.presale_link
            performance.latitude = row[7]
            performance.longitude = row[8]
            if datetime.strptime(performance.date, "%Y-%m-%d") < datetime.today():
                performance.color = "blue"
            else:
                performance.color = "lightgray"
            performances.append(performance) # append to performances
            print(performance.city + ', ' + performance.state)
Match found: 2024-05-31, Alpharetta, Ameris Bank Amphitheatre
Match found: 2024-06-01, Jacksonville, Daily's Place
Match found: 2024-06-06, Nashville, CMA Music Festival 2024
Match found: 2024-06-07, Myrtle Beach, Carolina Country Music Festival 2024
Match found: 2024-06-08, Nashville, Tin Roof Broadway
Match found: 2024-06-28, Bend, Hayden Homes Amphitheater
Match found: 2024-06-29, Auburn, White River Amphitheatre
Match found: 2024-06-30, Nampa, Ford Idaho Center Amphitheater
Match found: 2024-07-10, Harrisburg, XL Live
Match found: 2024-07-11, Camden, Freedom Mortgage Pavilion
Match found: 2024-07-12, Bristow, Jiffy Lube Live
Match found: 2024-07-13, Virginia Beach, Veterans United Home Loans Amphitheater at Virginia Beach
Match found: 2024-07-14, Dewey Beach, Bottle & Cork
Match found: 2024-07-18, Uncasville, Mohegan Sun Arena
Match found: 2024-07-19, Cuyahoga Falls, Blossom Music Center
Match found: 2024-07-20, Burgettstown, The Pavilion at Star Lake
Match found: 2024-07-25, Jacksonville, Daily's Place
Match found: 2024-07-26, Charleston, Credit One Stadium
Match found: 2024-07-27, Raleigh, Coastal Credit Union Music Park At Walnut Creek
Match found: 2024-08-01, Albuquerque, Isleta Amphitheater
Match found: 2024-08-02, Greenwood Village, Fiddler's Green Amphitheatre
Match found: 2024-08-03, West Valley City, Utah First Credit Union Amphitheatre
Match found: 2024-08-04, Bonner, KettleHouse Amphitheater
Match found: 2024-08-08, San Diego, North Island Credit Union Amphitheatre
Match found: 2024-08-09, Los Angeles, The Kia Forum
Match found: 2024-08-10, San Bernardino, Glen Helen Amphitheater
Match found: 2024-08-17, Montréal, Lasso  2024
Match found: 2024-08-22, Oklahoma City, Paycom Center
Match found: 2024-08-24, Des Moines, Wells Fargo Arena
Match found: 2024-08-30, Allentown, Allentown Fairgrounds
Match found: 2024-09-05, Somerset, Somerset Amphitheater
Match found: 2024-09-06, Peoria, Crusens Farmington Road
Match found: 2024-09-07, Noblesville, Ruoff Music Center
Match found: 2024-09-12, West Palm Beach, iTHINK Financial Amphitheatre
Match found: 2024-09-14, Tampa, MIDFLORIDA Credit Union Amphitheatre
Match found: 2024-10-19, Sacramento, GoldenSky Festival 2024

Put the performances in order

Do the mapping and save to html file

# Get the directory where the Python script or notebook is located
current_dir = os.path.dirname(os.path.abspath('pythonScrapeGeorgeBirge.ipynb'))

# Set the current working directory to the directory of the Python script or notebook
os.chdir(current_dir)

# Create a map centered at the geographical center of the US
m = folium.Map(location=[39.8283, -98.5795], zoom_start=4)

# Add markers for each location
for performance in performances:
    folium.Marker(
        location=[performance.latitude, performance.longitude],
        tooltip=(performance.venue + '<br>' +performance.city + ', ' + performance.state + '<br>' + performance.date),
        icon=folium.Icon(color=performance.color)
    ).add_to(m)

# Specify the path to save the HTML file
html_file_path = os.path.join(current_dir, 'map_GB.html')

# Save the map to an HTML file in the current directory
m.save(html_file_path)

print(f"Map saved to: {html_file_path}")

m
Map saved to: /Users/palmerjones/Website/Projects/map_GB.html
Make this Notebook Trusted to load map: File -> Trust Notebook

Save to CSV

# Save locations to csv

# Open the file in write mode with newline='' to prevent extra blank lines
with open(file_name, 'w', newline='') as csv_file:
    # Create a CSV writer object
    csv_writer = csv.writer(csv_file)
    
    # Write the header row
    #csv_writer.writerow(["Name", "Age", "City"])
    
    # Write the data rows
    for performance in performances:
        csv_writer.writerow([performance.datetime, performance.date, performance.venue, performance.city, performance.state, performance.country, performance.details, performance.latitude, performance.longitude])

print(f"Data saved to {file_name}")
Data saved to performances_GB.csv