Solutions | Chapter 5 | DSPy: The Comprehensive Guide

Solution 1 ⭐ Beginner

Basic BootstrapFewShot Optimization

import dspy
from dspy.teleprompt import BootstrapFewShot
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Configure DSPy
lm = dspy.LM("openai/gpt-4o-mini", api_key=os.getenv("OPENAI_API_KEY"))
dspy.configure(lm=lm)

class BasicQA(dspy.Module):
    def __init__(self):
        super().__init__()
        self.generate_answer = dspy.Predict("question -> answer")
    
    def forward(self, question):
        return self.generate_answer(question=question)

# Define exact match metric
def exact_match(example, pred, trace=None):
    """Check if prediction matches expected answer."""
    return example.answer.lower().strip() == pred.answer.lower().strip()

# Training data
trainset = [
    dspy.Example(question="What is 2+2?", answer="4").with_inputs("question"),
    dspy.Example(question="Capital of France?", answer="Paris").with_inputs("question"),
    dspy.Example(question="Who wrote Romeo and Juliet?", answer="Shakespeare").with_inputs("question"),
    dspy.Example(question="What is H2O?", answer="Water").with_inputs("question"),
    dspy.Example(question="How many continents?", answer="7").with_inputs("question"),
]

# Test data
testset = [
    dspy.Example(question="What is 3+3?", answer="6").with_inputs("question"),
    dspy.Example(question="Capital of Spain?", answer="Madrid").with_inputs("question"),
]

def bootstrap_optimize(program, trainset, max_demos=4):
    """Optimize the program using BootstrapFewShot."""
    optimizer = BootstrapFewShot(
        metric=exact_match,
        max_bootstrapped_demos=max_demos,
        max_labeled_demos=2
    )
    return optimizer.compile(program, trainset=trainset)

def evaluate(program, testset):
    """Evaluate program on test set."""
    correct = 0
    for example in testset:
        try:
            pred = program(question=example.question)
            if exact_match(example, pred):
                correct += 1
        except Exception as e:
            print(f"Error: {e}")
    return correct / len(testset) if testset else 0

# Run optimization
baseline = BasicQA()
optimized = bootstrap_optimize(BasicQA(), trainset, max_demos=4)

# Evaluate
baseline_score = evaluate(baseline, testset)
optimized_score = evaluate(optimized, testset)

print(f"Baseline accuracy: {baseline_score:.2%}")
print(f"Optimized accuracy: {optimized_score:.2%}")
print(f"Improvement: {optimized_score - baseline_score:+.2%}")

💡

Key Concepts

The metric function receives example (ground truth) and pred (prediction). Normalize strings with .lower().strip() for robust matching.

Solution 2 ⭐⭐ Intermediate

KNNFewShot for Context-Aware Selection

import dspy
from dspy.teleprompt import KNNFewShot
import os
from dotenv import load_dotenv

load_dotenv()
lm = dspy.LM("openai/gpt-4o-mini", api_key=os.getenv("OPENAI_API_KEY"))
dspy.configure(lm=lm)

class TopicClassifier(dspy.Module):
    def __init__(self):
        super().__init__()
        self.classify = dspy.Predict("text -> topic")
    
    def forward(self, text):
        return self.classify(text=text)

# Training data with diverse topics
trainset = [
    dspy.Example(text="Stock price increased after earnings", topic="finance").with_inputs("text"),
    dspy.Example(text="New study reveals vaccine effectiveness", topic="healthcare").with_inputs("text"),
    dspy.Example(text="Court ruled in favor of plaintiff", topic="legal").with_inputs("text"),
    dspy.Example(text="Quarterback threw touchdown pass", topic="sports").with_inputs("text"),
    dspy.Example(text="New iPhone features improved camera", topic="technology").with_inputs("text"),
    dspy.Example(text="Merger approved by shareholders", topic="finance").with_inputs("text"),
    dspy.Example(text="Patient recovered after surgery", topic="healthcare").with_inputs("text"),
    dspy.Example(text="Jury reached guilty verdict", topic="legal").with_inputs("text"),
]

# Test data
testset = [
    dspy.Example(text="Bitcoin price surged today", topic="finance").with_inputs("text"),
    dspy.Example(text="The team won the championship", topic="sports").with_inputs("text"),
]

def topic_match(example, pred, trace=None):
    """Check if predicted topic matches expected."""
    return example.topic.lower().strip() == pred.topic.lower().strip()

def create_knn_optimizer(k=3):
    """Create KNNFewShot optimizer."""
    return KNNFewShot(
        k=k,
        trainset=trainset
    )

def evaluate_classifier(classifier, testset):
    """Evaluate classifier accuracy."""
    correct = 0
    for example in testset:
        try:
            pred = classifier(text=example.text)
            if topic_match(example, pred):
                correct += 1
                print(f"✓ '{example.text[:40]}...' -> {pred.topic}")
            else:
                print(f"✗ '{example.text[:40]}...' -> {pred.topic} (expected: {example.topic})")
        except Exception as e:
            print(f"Error: {e}")
    return correct / len(testset) if testset else 0

# Create and compile
optimizer = create_knn_optimizer(k=3)
compiled = optimizer.compile(TopicClassifier(), trainset=trainset)

# Evaluate
print("=== KNNFewShot Results ===")
accuracy = evaluate_classifier(compiled, testset)
print(f"\nAccuracy: {accuracy:.2%}")

💡

Key Concepts

KNNFewShot selects the k most similar examples for each query. For finance questions, it selects finance examples—making the prompts more relevant!

Solution 3 ⭐⭐ Intermediate

MIPRO for Complex Reasoning

import dspy
from dspy.teleprompt import MIPRO
import re
import os
from dotenv import load_dotenv

load_dotenv()
lm = dspy.LM("openai/gpt-4o-mini", api_key=os.getenv("OPENAI_API_KEY"))
dspy.configure(lm=lm)

class MathSolver(dspy.Module):
    def __init__(self):
        super().__init__()
        self.solve = dspy.ChainOfThought("problem -> answer")
    
    def forward(self, problem):
        result = self.solve(problem=problem)
        return dspy.Prediction(
            steps=result.rationale,
            answer=result.answer
        )

# Training data
trainset = [
    dspy.Example(
        problem="A rope is 12m long. Cut into 3 equal pieces. Length of each?",
        answer="4 meters"
    ).with_inputs("problem"),
    dspy.Example(
        problem="Train travels 60km in 1 hour. How far in 3 hours?",
        answer="180 km"
    ).with_inputs("problem"),
    dspy.Example(
        problem="Box has 8 rows of 5 apples each. Total apples?",
        answer="40 apples"
    ).with_inputs("problem"),
]

# Test data
testset = [
    dspy.Example(
        problem="John saves $200 per month. How much in 6 months?",
        answer="$1,200"
    ).with_inputs("problem"),
]

def extract_number(text):
    """Extract numerical value from text."""
    numbers = re.findall(r'[\d,]+(?:\.\d+)?', str(text))
    if numbers:
        return float(numbers[-1].replace(',', ''))
    return None

def math_metric(example, pred, trace=None):
    """Metric that compares numerical answers."""
    pred_num = extract_number(pred.answer)
    true_num = extract_number(example.answer)
    
    if pred_num is None or true_num is None:
        # Fall back to string comparison
        return example.answer.lower() in pred.answer.lower()
    
    # Allow small numerical tolerance
    return abs(pred_num - true_num) < 0.01

def mipro_optimize(program, trainset, num_candidates=10):
    """Optimize using MIPRO."""
    optimizer = MIPRO(
        metric=math_metric,
        num_candidates=num_candidates,
        init_temperature=0.7,
        verbose=True
    )
    return optimizer.compile(
        program,
        trainset=trainset,
        num_trials=3,
        max_bootstrapped_demos=4
    )

# Optimize
print("=== MIPRO Optimization ===")
optimized = mipro_optimize(MathSolver(), trainset)

# Test
print("\n=== Testing ===")
for example in testset:
    result = optimized(problem=example.problem)
    print(f"Problem: {example.problem}")
    print(f"Reasoning: {result.steps}")
    print(f"Answer: {result.answer}")
    print(f"Expected: {example.answer}")
    print(f"Correct: {math_metric(example, result)}")

💡

Key Concepts

MIPRO optimizes both instructions and demonstrations. The extract_number helper enables robust numerical comparison even when formats differ.

Solution 4 ⭐⭐⭐ Advanced

Optimizer Comparison Benchmark

import dspy
from dspy.teleprompt import BootstrapFewShot, KNNFewShot, MIPRO
import time
import os
from dotenv import load_dotenv

load_dotenv()
lm = dspy.LM("openai/gpt-4o-mini", api_key=os.getenv("OPENAI_API_KEY"))
dspy.configure(lm=lm)

class SentimentAnalyzer(dspy.Module):
    def __init__(self):
        super().__init__()
        self.analyze = dspy.Predict("text -> sentiment")
    
    def forward(self, text):
        return self.analyze(text=text)

trainset = [
    dspy.Example(text="I love this product!", sentiment="positive").with_inputs("text"),
    dspy.Example(text="Terrible quality.", sentiment="negative").with_inputs("text"),
    dspy.Example(text="Works as expected.", sentiment="neutral").with_inputs("text"),
    dspy.Example(text="Outstanding service!", sentiment="positive").with_inputs("text"),
    dspy.Example(text="Would not recommend.", sentiment="negative").with_inputs("text"),
    dspy.Example(text="It's okay I guess.", sentiment="neutral").with_inputs("text"),
]

testset = [
    dspy.Example(text="Best purchase ever!", sentiment="positive").with_inputs("text"),
    dspy.Example(text="Complete waste of money.", sentiment="negative").with_inputs("text"),
    dspy.Example(text="Does the job.", sentiment="neutral").with_inputs("text"),
]

def sentiment_match(example, pred, trace=None):
    return example.sentiment.lower() == pred.sentiment.lower()

def evaluate(program, testset):
    correct = sum(1 for ex in testset 
                  if sentiment_match(ex, program(text=ex.text)))
    return correct / len(testset)

def benchmark_optimizers(program_class, trainset, testset):
    """Compare multiple optimizers."""
    results = {}
    
    # Baseline
    print("Testing Baseline...")
    baseline = program_class()
    results["Baseline"] = {
        "accuracy": evaluate(baseline, testset),
        "time": 0
    }
    
    # BootstrapFewShot
    print("Testing BootstrapFewShot...")
    start = time.time()
    optimizer = BootstrapFewShot(metric=sentiment_match, max_bootstrapped_demos=4)
    compiled = optimizer.compile(program_class(), trainset=trainset)
    results["BootstrapFewShot"] = {
        "accuracy": evaluate(compiled, testset),
        "time": time.time() - start
    }
    
    # KNNFewShot
    print("Testing KNNFewShot...")
    start = time.time()
    optimizer = KNNFewShot(k=3, trainset=trainset)
    compiled = optimizer.compile(program_class(), trainset=trainset)
    results["KNNFewShot"] = {
        "accuracy": evaluate(compiled, testset),
        "time": time.time() - start
    }
    
    # MIPRO
    print("Testing MIPRO...")
    start = time.time()
    optimizer = MIPRO(metric=sentiment_match, auto="light")
    compiled = optimizer.compile(program_class(), trainset=trainset, num_trials=2)
    results["MIPRO"] = {
        "accuracy": evaluate(compiled, testset),
        "time": time.time() - start
    }
    
    return results

def print_comparison(results):
    """Print comparison report."""
    print("\n" + "=" * 50)
    print("OPTIMIZER COMPARISON REPORT")
    print("=" * 50)
    
    baseline_acc = results["Baseline"]["accuracy"]
    
    for name, data in results.items():
        print(f"\n{name}:")
        print(f"  Accuracy: {data['accuracy']:.1%}")
        print(f"  Time: {data['time']:.1f}s")
        if name != "Baseline":
            improvement = data['accuracy'] - baseline_acc
            print(f"  Improvement: {improvement:+.1%}")
    
    # Best performance
    best = max(results.items(), key=lambda x: x[1]['accuracy'])
    print(f"\n🏆 Best Performance: {best[0]}")

# Run benchmark
results = benchmark_optimizers(SentimentAnalyzer, trainset, testset)
print_comparison(results)

💡

Key Concepts

This benchmark tracks both accuracy and compilation time. Note how MIPRO takes longer but often achieves the best results.

Solution 5 ⭐⭐⭐ Advanced

Progressive Optimization Pipeline

import dspy
from dspy.teleprompt import BootstrapFewShot, KNNFewShot, MIPRO
import time
import os
from dotenv import load_dotenv

load_dotenv()
lm = dspy.LM("openai/gpt-4o-mini", api_key=os.getenv("OPENAI_API_KEY"))
dspy.configure(lm=lm)

def evaluate(program, valset, metric):
    """Evaluate program on validation set."""
    correct = sum(1 for ex in valset if metric(ex, program(**ex.inputs())))
    return correct / len(valset)

def progressive_optimize(program_class, trainset, valset, metric, target_accuracy=0.85):
    """
    Progressively optimize until target accuracy is reached.
    """
    stages = [
        {
            "name": "BootstrapFewShot",
            "optimizer": BootstrapFewShot(metric=metric, max_bootstrapped_demos=4),
            "config": {}
        },
        {
            "name": "KNNFewShot",
            "optimizer": KNNFewShot(k=3, trainset=trainset),
            "config": {}
        },
        {
            "name": "MIPRO",
            "optimizer": MIPRO(metric=metric, auto="medium"),
            "config": {"num_trials": 3}
        }
    ]
    
    results = []
    best_program = None
    best_accuracy = 0
    
    # Baseline
    baseline = program_class()
    baseline_acc = evaluate(baseline, valset, metric)
    results.append({
        "stage": "Baseline",
        "accuracy": baseline_acc,
        "time": 0
    })
    print(f"Baseline: {baseline_acc:.1%}")
    
    for stage in stages:
        print(f"\nTrying {stage['name']}...")
        start = time.time()
        
        try:
            compiled = stage['optimizer'].compile(
                program_class(),
                trainset=trainset,
                **stage['config']
            )
            
            accuracy = evaluate(compiled, valset, metric)
            elapsed = time.time() - start
            
            results.append({
                "stage": stage['name'],
                "accuracy": accuracy,
                "time": elapsed
            })
            
            print(f"  Accuracy: {accuracy:.1%} (took {elapsed:.1f}s)")
            
            if accuracy > best_accuracy:
                best_accuracy = accuracy
                best_program = compiled
                print(f"  ✓ New best!")
            
            # Early stopping if target reached
            if accuracy >= target_accuracy:
                print(f"\n🎯 Target accuracy {target_accuracy:.1%} reached!")
                break
                
        except Exception as e:
            print(f"  Error: {e}")
    
    return best_program, results

def analyze_optimization_roi(results):
    """Analyze return on investment."""
    print("\n" + "=" * 50)
    print("ROI ANALYSIS")
    print("=" * 50)
    
    baseline_acc = results[0]['accuracy']
    
    for i, result in enumerate(results[1:], 1):
        gain = result['accuracy'] - baseline_acc
        time_spent = result['time']
        
        if time_spent > 0:
            roi = gain / (time_spent / 60)  # Gain per minute
            print(f"\n{result['stage']}:")
            print(f"  Accuracy gain: {gain:+.1%}")
            print(f"  Time spent: {time_spent:.1f}s")
            print(f"  ROI: {roi:.2%} per minute")

# Example usage
class SimpleQA(dspy.Module):
    def __init__(self):
        super().__init__()
        self.answer = dspy.Predict("question -> answer")
    
    def forward(self, question):
        return self.answer(question=question)

trainset = [
    dspy.Example(question="What is 2+2?", answer="4").with_inputs("question"),
    dspy.Example(question="Capital of France?", answer="Paris").with_inputs("question"),
    # ... more examples
]

valset = [
    dspy.Example(question="What is 5+5?", answer="10").with_inputs("question"),
]

def metric(ex, pred, trace=None):
    return ex.answer.lower() in pred.answer.lower()

# Run progressive optimization
best, results = progressive_optimize(
    SimpleQA, trainset, valset, metric, target_accuracy=0.80
)

# Analyze ROI
analyze_optimization_roi(results)

print("\n✅ Optimization complete!")

💡

Key Concepts

Progressive optimization stops early when target is reached—saving time. ROI analysis helps identify which optimizer gives best "bang for buck."

🎉

Congratulations! You've completed Chapter 5: Optimizers & Compilation. You now understand how to automatically optimize DSPy programs for maximum performance!

Start Chapter 6: Real-World Applications