Apr 2023
Wordle SolverInformation theory and fun

I found myself temporarily addicted to Wordle, so wrote a couple of little solvers to explore the way the game worked

#nonsense #code #misc #internal #text #deep-dive #python #algorithms #information-theory

Back in 2021 I became briefly obsessed by wordle, doing the puzzles religiously every day. After a while I tried to figure out an optimal algorithmic solution.

I came up with a couple of different options. This version is a modified MiniMax, with a lookahead model to python script manages to reduce the risk of hitting trap words: like when you get _ight and have no option but to blindly guess might, sight, fight etc.

It solves hard-mode wordle for 99.57% of possible wordle words, and averages 3.62 guesses. It fails on boxer, foyer, gaunt, watch, caste, tight, shame, ratty, found, mound.

It's quite a computationally intensive solution, but it's still more computationally efficient than me.

wordle_recursive_minimax.py
import os
import time

# --- 1. Core Wordle Logic ---

def load_words(filename):
    if not os.path.exists(filename):
        return []
    with open(filename, 'r', encoding='utf-8') as file:
        return [line.strip().lower() for line in file if len(line.strip()) == 5]

def get_feedback(guess, target):
    """The standard Wordle color engine."""
    feedback = ['B'] * 5
    t_chars = list(target)
    g_chars = list(guess)
    # Greens
    for i in range(5):
        if g_chars[i] == t_chars[i]:
            feedback[i] = 'G'
            t_chars[i] = g_chars[i] = None
    # Yellows
    for i in range(5):
        if g_chars[i] is not None and g_chars[i] in t_chars:
            feedback[i] = 'Y'
            t_chars[t_chars.index(g_chars[i])] = None
    return "".join(feedback)

# --- 2. Advanced 100% Logic (Recursive Lookahead) ---

def get_buckets(guess, pool):
    """Groups a pool of words by the feedback pattern they generate."""
    buckets = {}
    for target in pool:
        pattern = get_feedback(guess, target)
        if pattern not in buckets:
            buckets[pattern] = []
        buckets[pattern].append(target)
    return buckets

def is_guaranteed_solve(guess, pool, turns_left):
    """
    Recursive check: Can this guess solve EVERY word in the pool 
    within the remaining turns?
    """
    if turns_left == 1:
        return len(pool) == 1 and pool[0] == guess
    
    buckets = get_buckets(guess, pool)
    
    # For every possible color pattern we might get back...
    for pattern, sub_pool in buckets.items():
        if pattern == "GGGGG":
            continue
        
        # We must find at least ONE word in the next turn that 
        # can handle this sub-pool.
        success = False
        # In Hard Mode, we can only guess from the sub_pool itself
        for next_guess in sub_pool:
            if is_guaranteed_solve(next_guess, sub_pool, turns_left - 1):
                success = True
                break
        
        if not success:
            return False
            
    return True

def get_best_hard_mode_guess(current_pool, attempts_made):
    """
    Decision Engine:
    1. If pool is large, use Minimax (split into smallest buckets).
    2. If pool is small (<12), use Recursive Safety to guarantee 100%.
    """
    turns_left = 6 - attempts_made
    
    if len(current_pool) <= 2:
        return current_pool[0]

    # --- Step A: Safety First (For Trap Words) ---
    # If the pool is small, check if any word GUARANTEES a win.
    if len(current_pool) < 12:
        for guess in current_pool:
            if is_guaranteed_solve(guess, current_pool, turns_left):
                return guess

    # --- Step B: Minimax (For General Efficiency) ---
    best_word = ""
    min_worst_case = float('inf')

    for guess in current_pool:
        buckets = get_buckets(guess, current_pool)
        worst_case = max(len(b) for b in buckets.values())
        
        # Tie-breaker: If worst-cases are equal, pick the word that 
        # creates the MOST total buckets (Shannon Entropy concept).
        if worst_case < min_worst_case:
            min_worst_case = worst_case
            best_word = guess
        elif worst_case == min_worst_case:
            if len(buckets) > len(get_buckets(best_word, current_pool)):
                best_word = guess
                
    return best_word

# --- 3. The 100% Validation Tester ---

def run_final_test():
    targets = load_words('possible_targets.txt')
    if not targets:
        print("Error: possible_targets.txt not found.")
        return

    print("-" * 45)
    print(" WORDLE HARD MODE: 100% RECURSIVE VALIDATION ")
    print("-" * 45)

    # SALET is the mathematically proven best opener for Hard Mode
    first_guess = "salet"
    print(f"Opening word: {first_guess.upper()}\n")

    stats = {i: 0 for i in range(1, 8)} 
    failures = []
    start_time = time.time()

    for i, secret_word in enumerate(targets):
        current_pool = targets.copy()
        attempts = 1
        
        while attempts <= 6:
            if attempts == 1:
                guess = first_guess
            else:
                guess = get_best_hard_mode_guess(current_pool, attempts - 1)
            
            pattern = get_feedback(guess, secret_word)
            if pattern == "GGGGG":
                stats[attempts] += 1
                break
            
            # Hard Mode filtering
            current_pool = [w for w in current_pool if get_feedback(guess, w) == pattern]
            attempts += 1
        else:
            stats[7] += 1
            failures.append(secret_word)
            print(f"FAILED: {secret_word.upper()}")

        if (i + 1) % 50 == 0:
            rate = ((i + 1 - stats[7]) / (i + 1)) * 100
            print(f"Checked {i+1}/{len(targets)}... Current Win Rate: {rate:.2f}%")

    # --- Final Report ---
    total = len(targets)
    duration = time.time() - start_time
    print("\n" + " FINAL PERFORMANCE SUMMARY ".center(45, "="))
    print(f"Win Rate:           {((total - stats[7])/total)*100:.2f}%")
    print(f"Average Guesses:    {sum(k*stats[k] for k in range(1,7))/(total-stats[7]):.3f}")
    print(f"Total Time:         {duration:.1f}s")
    print(f"Failures:           {', '.join(failures) if failures else 'None'}")
    print("=" * 45)

if __name__ == "__main__":
    run_final_test()

The second version is a modified Markov model - I wanted to try to incorporate statistical features of letter order in English words to adjust a probability estimate for greater accuracy to beat the final 10 words. It performs with significantly lower accuracy than the simple minimax lookahead.

wordle_hybrid_markov.py
import os
import time

# --- 1. File Loading ---
def load_words(filename):
    if not os.path.exists(filename):
        print(f"Error: Could not find '{filename}'.")
        return []
    with open(filename, 'r', encoding='utf-8') as file:
        return [line.strip().lower() for line in file if len(line.strip()) == 5]

# --- 2. Positional Splitting (Minimax) ---
def build_split_matrix(possible_targets):
    total_targets = len(possible_targets)
    split_matrix = [{chr(i): 0 for i in range(97, 123)} for _ in range(5)]
    
    for pos in range(5):
        for char_code in range(97, 123):
            char = chr(char_code)
            green, yellow, gray = 0, 0, 0
            for target in possible_targets:
                if target[pos] == char:
                    green += 1
                elif char in target:
                    yellow += 1
                else:
                    gray += 1
            worst_case_bucket = max(green, yellow, gray)
            split_matrix[pos][char] = total_targets - worst_case_bucket
    return split_matrix

# --- 3. Markov Flow (Bigrams) ---
def build_markov_matrix(possible_targets):
    markov_matrix = [{chr(a): {chr(b): 0 for b in range(97, 123)} 
                      for a in range(97, 123)} for _ in range(4)]
    for word in possible_targets:
        for i in range(4):
            char1 = word[i]
            char2 = word[i+1]
            markov_matrix[i][char1][char2] += 1
    return markov_matrix

# --- 4. Hybrid Scoring & Decision Engine ---
def score_guess_hybrid(guess, split_matrix, markov_matrix, markov_weight):
    score = 0
    seen_chars = set()
    
    for pos, char in enumerate(guess):
        score += split_matrix[pos][char]
        if char in seen_chars:
            score -= (split_matrix[pos][char] * 0.5)
        seen_chars.add(char)
        
    markov_bonus = 0
    for i in range(4):
        char1 = guess[i]
        char2 = guess[i+1]
        markov_bonus += markov_matrix[i][char1][char2]
        
    return score + (markov_bonus * markov_weight)

def get_best_hybrid_guess(valid_guesses, possible_targets, markov_weight):
    if len(possible_targets) == 1:
        return possible_targets[0]

    split_matrix = build_split_matrix(possible_targets) 
    markov_matrix = build_markov_matrix(possible_targets)
    
    best_guess = ""
    max_score = -1

    for guess in valid_guesses:
        score = score_guess_hybrid(guess, split_matrix, markov_matrix, markov_weight)
        if score > max_score or (score == max_score and guess in possible_targets):
            max_score = score
            best_guess = guess

    return best_guess

# --- 5. Game Engine Rules ---
def generate_feedback(guess, target):
    feedback = ['B'] * 5
    target_chars = list(target)
    
    for i in range(5):
        if guess[i] == target[i]:
            feedback[i] = 'G'
            target_chars[i] = None
            
    for i in range(5):
        if feedback[i] == 'B' and guess[i] in target_chars:
            feedback[i] = 'Y'
            target_chars[target_chars.index(guess[i])] = None
            
    return "".join(feedback)

def filter_targets(guess, feedback, possible_targets):
    filtered_list = []
    for target in possible_targets:
        match = True
        target_chars = list(target)
        guess_chars = list(guess)
        
        for i in range(5):
            if feedback[i] == 'G':
                if target_chars[i] != guess_chars[i]:
                    match = False
                    break
                target_chars[i] = None
                guess_chars[i] = None
                
        if not match: continue
            
        for i in range(5):
            if guess_chars[i] is None: continue
            letter = guess_chars[i]
            if feedback[i] == 'Y':
                if target[i] == letter or letter not in target_chars:
                    match = False
                    break
                target_chars[target_chars.index(letter)] = None 
            elif feedback[i] == 'B':
                if letter in target_chars:
                    match = False
                    break

        if match:
            filtered_list.append(target)
            
    return filtered_list

# --- 6. The Simulation Loop (Sequential + Caching) ---
def simulate_single_game(target_word, valid_guesses, all_targets, weight, cached_first_guess):
    possible_targets = all_targets.copy()
    attempts = 1
    
    while attempts <= 6:
        # Use the pre-calculated guess on turn 1 to save massive compute time
        if attempts == 1:
            guess = cached_first_guess
        else:
            guess = get_best_hybrid_guess(valid_guesses, possible_targets, weight)
            
        if guess == target_word:
            return attempts
            
        feedback = generate_feedback(guess, target_word)
        possible_targets = filter_targets(guess, feedback, possible_targets)
        
        if not possible_targets:
            return 7 
        attempts += 1
        
    return 7 

# --- 7. The Sequential Execution Manager ---
def run_simulation(valid_guesses, all_targets):
    weights_to_test = [0.0, 0.5, 1.0, 1.5, 2.0]
    
    # Testing a subset of words (e.g., 50) so the sequential loop doesn't take hours
    # Change to all_targets if you want to run the full, slow dictionary test.
    test_batch = all_targets
    
    print("=========================================")
    print("SEQUENTIAL HYBRID ALGORITHM TUNING LAB")
    print(f"Testing {len(test_batch)} words across {len(weights_to_test)} configurations.")
    print("=========================================\n")
    
    for weight in weights_to_test:
        start_time = time.time()
        
        print(f"--- Testing Markov Weight: {weight} ---")
        print("Pre-calculating optimal first guess...")
        
        # Calculate the opening guess ONCE for this weight
        first_guess = get_best_hybrid_guess(valid_guesses, all_targets, weight)
        print(f"Optimal opening word for weight {weight} is: {first_guess.upper()}")
        print("Running sequential simulation...")
        
        results = []
        for target in test_batch:
            score = simulate_single_game(target, valid_guesses, all_targets, weight, first_guess)
            results.append(score)
        
        # Tally the results
        losses = sum(1 for score in results if score > 6)
        total_guesses = sum(score if score <= 6 else 7 for score in results)
        
        average_score = total_guesses / len(test_batch)
        elapsed_time = time.time() - start_time
        win_rate = ((len(test_batch) - losses) / len(test_batch)) * 100
        
        print(f"Average Guesses: {average_score:.3f}")
        print(f"Win Rate:        {win_rate:.1f}%")
        print(f"Time Taken:      {elapsed_time:.1f} seconds\n")

if __name__ == "__main__":
    targets = load_words('possible_targets.txt')
    guesses = load_words('valid_guesses.txt')
    
    if targets and guesses:
        guesses = list(set(guesses + targets))
        run_simulation(guesses, targets)
    else:
        print("Missing dictionary files. Please create possible_targets.txt and valid_guesses.txt")

I've since accepted boxer, foyer, and gaunt as my bete noires and am satisfied with a 99.57% success rate. Unfortunately a (mostly) reliable machine solution has now taken all the fun out the game and I haven't played wordle since.