Wordle Solver — Information theory and fun
Back in 2021 I became briefly obsessed by wordle, doing the puzzles religiously every day. After a while I tried to figure out an optimal algorithmic solution.
I came up with a couple of different options. This version is a modified MiniMax, with a lookahead model to python script manages to reduce the risk of hitting trap words: like when you get _ight and have no option but to blindly guess might, sight, fight etc.
It solves hard-mode wordle for 99.57% of possible wordle words, and averages 3.62 guesses. It fails on boxer, foyer, gaunt, watch, caste, tight, shame, ratty, found, mound.
It's quite a computationally intensive solution, but it's still more computationally efficient than me.
import os
import time
# --- 1. Core Wordle Logic ---
def load_words(filename):
if not os.path.exists(filename):
return []
with open(filename, 'r', encoding='utf-8') as file:
return [line.strip().lower() for line in file if len(line.strip()) == 5]
def get_feedback(guess, target):
"""The standard Wordle color engine."""
feedback = ['B'] * 5
t_chars = list(target)
g_chars = list(guess)
# Greens
for i in range(5):
if g_chars[i] == t_chars[i]:
feedback[i] = 'G'
t_chars[i] = g_chars[i] = None
# Yellows
for i in range(5):
if g_chars[i] is not None and g_chars[i] in t_chars:
feedback[i] = 'Y'
t_chars[t_chars.index(g_chars[i])] = None
return "".join(feedback)
# --- 2. Advanced 100% Logic (Recursive Lookahead) ---
def get_buckets(guess, pool):
"""Groups a pool of words by the feedback pattern they generate."""
buckets = {}
for target in pool:
pattern = get_feedback(guess, target)
if pattern not in buckets:
buckets[pattern] = []
buckets[pattern].append(target)
return buckets
def is_guaranteed_solve(guess, pool, turns_left):
"""
Recursive check: Can this guess solve EVERY word in the pool
within the remaining turns?
"""
if turns_left == 1:
return len(pool) == 1 and pool[0] == guess
buckets = get_buckets(guess, pool)
# For every possible color pattern we might get back...
for pattern, sub_pool in buckets.items():
if pattern == "GGGGG":
continue
# We must find at least ONE word in the next turn that
# can handle this sub-pool.
success = False
# In Hard Mode, we can only guess from the sub_pool itself
for next_guess in sub_pool:
if is_guaranteed_solve(next_guess, sub_pool, turns_left - 1):
success = True
break
if not success:
return False
return True
def get_best_hard_mode_guess(current_pool, attempts_made):
"""
Decision Engine:
1. If pool is large, use Minimax (split into smallest buckets).
2. If pool is small (<12), use Recursive Safety to guarantee 100%.
"""
turns_left = 6 - attempts_made
if len(current_pool) <= 2:
return current_pool[0]
# --- Step A: Safety First (For Trap Words) ---
# If the pool is small, check if any word GUARANTEES a win.
if len(current_pool) < 12:
for guess in current_pool:
if is_guaranteed_solve(guess, current_pool, turns_left):
return guess
# --- Step B: Minimax (For General Efficiency) ---
best_word = ""
min_worst_case = float('inf')
for guess in current_pool:
buckets = get_buckets(guess, current_pool)
worst_case = max(len(b) for b in buckets.values())
# Tie-breaker: If worst-cases are equal, pick the word that
# creates the MOST total buckets (Shannon Entropy concept).
if worst_case < min_worst_case:
min_worst_case = worst_case
best_word = guess
elif worst_case == min_worst_case:
if len(buckets) > len(get_buckets(best_word, current_pool)):
best_word = guess
return best_word
# --- 3. The 100% Validation Tester ---
def run_final_test():
targets = load_words('possible_targets.txt')
if not targets:
print("Error: possible_targets.txt not found.")
return
print("-" * 45)
print(" WORDLE HARD MODE: 100% RECURSIVE VALIDATION ")
print("-" * 45)
# SALET is the mathematically proven best opener for Hard Mode
first_guess = "salet"
print(f"Opening word: {first_guess.upper()}\n")
stats = {i: 0 for i in range(1, 8)}
failures = []
start_time = time.time()
for i, secret_word in enumerate(targets):
current_pool = targets.copy()
attempts = 1
while attempts <= 6:
if attempts == 1:
guess = first_guess
else:
guess = get_best_hard_mode_guess(current_pool, attempts - 1)
pattern = get_feedback(guess, secret_word)
if pattern == "GGGGG":
stats[attempts] += 1
break
# Hard Mode filtering
current_pool = [w for w in current_pool if get_feedback(guess, w) == pattern]
attempts += 1
else:
stats[7] += 1
failures.append(secret_word)
print(f"FAILED: {secret_word.upper()}")
if (i + 1) % 50 == 0:
rate = ((i + 1 - stats[7]) / (i + 1)) * 100
print(f"Checked {i+1}/{len(targets)}... Current Win Rate: {rate:.2f}%")
# --- Final Report ---
total = len(targets)
duration = time.time() - start_time
print("\n" + " FINAL PERFORMANCE SUMMARY ".center(45, "="))
print(f"Win Rate: {((total - stats[7])/total)*100:.2f}%")
print(f"Average Guesses: {sum(k*stats[k] for k in range(1,7))/(total-stats[7]):.3f}")
print(f"Total Time: {duration:.1f}s")
print(f"Failures: {', '.join(failures) if failures else 'None'}")
print("=" * 45)
if __name__ == "__main__":
run_final_test()
The second version is a modified Markov model - I wanted to try to incorporate statistical features of letter order in English words to adjust a probability estimate for greater accuracy to beat the final 10 words. It performs with significantly lower accuracy than the simple minimax lookahead.
import os
import time
# --- 1. File Loading ---
def load_words(filename):
if not os.path.exists(filename):
print(f"Error: Could not find '{filename}'.")
return []
with open(filename, 'r', encoding='utf-8') as file:
return [line.strip().lower() for line in file if len(line.strip()) == 5]
# --- 2. Positional Splitting (Minimax) ---
def build_split_matrix(possible_targets):
total_targets = len(possible_targets)
split_matrix = [{chr(i): 0 for i in range(97, 123)} for _ in range(5)]
for pos in range(5):
for char_code in range(97, 123):
char = chr(char_code)
green, yellow, gray = 0, 0, 0
for target in possible_targets:
if target[pos] == char:
green += 1
elif char in target:
yellow += 1
else:
gray += 1
worst_case_bucket = max(green, yellow, gray)
split_matrix[pos][char] = total_targets - worst_case_bucket
return split_matrix
# --- 3. Markov Flow (Bigrams) ---
def build_markov_matrix(possible_targets):
markov_matrix = [{chr(a): {chr(b): 0 for b in range(97, 123)}
for a in range(97, 123)} for _ in range(4)]
for word in possible_targets:
for i in range(4):
char1 = word[i]
char2 = word[i+1]
markov_matrix[i][char1][char2] += 1
return markov_matrix
# --- 4. Hybrid Scoring & Decision Engine ---
def score_guess_hybrid(guess, split_matrix, markov_matrix, markov_weight):
score = 0
seen_chars = set()
for pos, char in enumerate(guess):
score += split_matrix[pos][char]
if char in seen_chars:
score -= (split_matrix[pos][char] * 0.5)
seen_chars.add(char)
markov_bonus = 0
for i in range(4):
char1 = guess[i]
char2 = guess[i+1]
markov_bonus += markov_matrix[i][char1][char2]
return score + (markov_bonus * markov_weight)
def get_best_hybrid_guess(valid_guesses, possible_targets, markov_weight):
if len(possible_targets) == 1:
return possible_targets[0]
split_matrix = build_split_matrix(possible_targets)
markov_matrix = build_markov_matrix(possible_targets)
best_guess = ""
max_score = -1
for guess in valid_guesses:
score = score_guess_hybrid(guess, split_matrix, markov_matrix, markov_weight)
if score > max_score or (score == max_score and guess in possible_targets):
max_score = score
best_guess = guess
return best_guess
# --- 5. Game Engine Rules ---
def generate_feedback(guess, target):
feedback = ['B'] * 5
target_chars = list(target)
for i in range(5):
if guess[i] == target[i]:
feedback[i] = 'G'
target_chars[i] = None
for i in range(5):
if feedback[i] == 'B' and guess[i] in target_chars:
feedback[i] = 'Y'
target_chars[target_chars.index(guess[i])] = None
return "".join(feedback)
def filter_targets(guess, feedback, possible_targets):
filtered_list = []
for target in possible_targets:
match = True
target_chars = list(target)
guess_chars = list(guess)
for i in range(5):
if feedback[i] == 'G':
if target_chars[i] != guess_chars[i]:
match = False
break
target_chars[i] = None
guess_chars[i] = None
if not match: continue
for i in range(5):
if guess_chars[i] is None: continue
letter = guess_chars[i]
if feedback[i] == 'Y':
if target[i] == letter or letter not in target_chars:
match = False
break
target_chars[target_chars.index(letter)] = None
elif feedback[i] == 'B':
if letter in target_chars:
match = False
break
if match:
filtered_list.append(target)
return filtered_list
# --- 6. The Simulation Loop (Sequential + Caching) ---
def simulate_single_game(target_word, valid_guesses, all_targets, weight, cached_first_guess):
possible_targets = all_targets.copy()
attempts = 1
while attempts <= 6:
# Use the pre-calculated guess on turn 1 to save massive compute time
if attempts == 1:
guess = cached_first_guess
else:
guess = get_best_hybrid_guess(valid_guesses, possible_targets, weight)
if guess == target_word:
return attempts
feedback = generate_feedback(guess, target_word)
possible_targets = filter_targets(guess, feedback, possible_targets)
if not possible_targets:
return 7
attempts += 1
return 7
# --- 7. The Sequential Execution Manager ---
def run_simulation(valid_guesses, all_targets):
weights_to_test = [0.0, 0.5, 1.0, 1.5, 2.0]
# Testing a subset of words (e.g., 50) so the sequential loop doesn't take hours
# Change to all_targets if you want to run the full, slow dictionary test.
test_batch = all_targets
print("=========================================")
print("SEQUENTIAL HYBRID ALGORITHM TUNING LAB")
print(f"Testing {len(test_batch)} words across {len(weights_to_test)} configurations.")
print("=========================================\n")
for weight in weights_to_test:
start_time = time.time()
print(f"--- Testing Markov Weight: {weight} ---")
print("Pre-calculating optimal first guess...")
# Calculate the opening guess ONCE for this weight
first_guess = get_best_hybrid_guess(valid_guesses, all_targets, weight)
print(f"Optimal opening word for weight {weight} is: {first_guess.upper()}")
print("Running sequential simulation...")
results = []
for target in test_batch:
score = simulate_single_game(target, valid_guesses, all_targets, weight, first_guess)
results.append(score)
# Tally the results
losses = sum(1 for score in results if score > 6)
total_guesses = sum(score if score <= 6 else 7 for score in results)
average_score = total_guesses / len(test_batch)
elapsed_time = time.time() - start_time
win_rate = ((len(test_batch) - losses) / len(test_batch)) * 100
print(f"Average Guesses: {average_score:.3f}")
print(f"Win Rate: {win_rate:.1f}%")
print(f"Time Taken: {elapsed_time:.1f} seconds\n")
if __name__ == "__main__":
targets = load_words('possible_targets.txt')
guesses = load_words('valid_guesses.txt')
if targets and guesses:
guesses = list(set(guesses + targets))
run_simulation(guesses, targets)
else:
print("Missing dictionary files. Please create possible_targets.txt and valid_guesses.txt")
I've since accepted boxer, foyer, and gaunt as my bete noires and am satisfied with a 99.57% success rate. Unfortunately a (mostly) reliable machine solution has now taken all the fun out the game and I haven't played wordle since.