Important: Try to solve the exercises yourself before looking at these solutions. Learning comes from struggle!
Verify Your DSPy Installation
This solution demonstrates proper DSPy setup verification with clear status messages.
"""
Exercise 1: Verify DSPy Installation - SOLUTION
"""
import os
from dotenv import load_dotenv
import dspy
def main():
print("DSPy Installation Check")
print("=" * 23)
print()
# Step 1: Load environment variables
load_dotenv()
# Step 2: Print DSPy version
try:
version = dspy.__version__
print(f"✓ DSPy version: {version}")
except AttributeError:
print("✓ DSPy imported successfully (version not available)")
# Step 3: Check for API key
api_key = os.getenv("OPENAI_API_KEY")
if api_key:
# Show first/last 4 chars for verification
masked = f"{api_key[:4]}...{api_key[-4:]}"
print(f"✓ API key found: {masked}")
else:
print("✗ API key not found in environment")
print(" Set OPENAI_API_KEY in your .env file")
return
# Step 4: Configure language model
try:
lm = dspy.LM("openai/gpt-4o-mini", api_key=api_key)
dspy.configure(lm=lm)
print("✓ Language model configured")
except Exception as e:
print(f"✗ Failed to configure LM: {e}")
return
# Step 5: Run a test prediction
try:
qa = dspy.Predict("question -> answer")
result = qa(question="What is 2+2?")
print("✓ Test prediction successful")
print()
print(f"Test question: What is 2+2?")
print(f"Test answer: {result.answer}")
except Exception as e:
print(f"✗ Test prediction failed: {e}")
return
print()
print("All checks passed! ✨")
if __name__ == "__main__":
main()
Key Concepts
This solution uses dotenv for environment variable
management, proper error handling at each step, and masks the API key
for security when displaying.
Create Custom Signatures
This solution shows how to create well-documented DSPy signatures for common NLP tasks.
"""
Exercise 2: Create Custom Signatures - SOLUTION
"""
import os
from dotenv import load_dotenv
import dspy
load_dotenv()
# Translation Signature
class Translate(dspy.Signature):
"""Translate English text to Spanish."""
english_text: str = dspy.InputField(
desc="The English text to translate"
)
spanish_text: str = dspy.OutputField(
desc="The Spanish translation of the input text"
)
# Sentiment Analysis Signature
class AnalyzeSentiment(dspy.Signature):
"""Classify the sentiment of the given text."""
text: str = dspy.InputField(
desc="The text to analyze for sentiment"
)
sentiment: str = dspy.OutputField(
desc="The sentiment: 'positive', 'negative', or 'neutral'"
)
confidence: str = dspy.OutputField(
desc="Confidence level: 'high', 'medium', or 'low'"
)
# Summarization Signature
class Summarize(dspy.Signature):
"""Create a brief summary of the provided text."""
text: str = dspy.InputField(
desc="The text to summarize"
)
summary: str = dspy.OutputField(
desc="A concise summary in 1-2 sentences"
)
# Entity Extraction Signature
class ExtractEntities(dspy.Signature):
"""Extract named entities from text."""
text: str = dspy.InputField(
desc="The text to extract entities from"
)
people: str = dspy.OutputField(
desc="Comma-separated list of person names found"
)
organizations: str = dspy.OutputField(
desc="Comma-separated list of organization names found"
)
locations: str = dspy.OutputField(
desc="Comma-separated list of location names found"
)
def test_signatures():
"""Test each signature with sample inputs."""
# Configure the LM
lm = dspy.LM("openai/gpt-4o-mini", api_key=os.getenv("OPENAI_API_KEY"))
dspy.configure(lm=lm)
print("Testing Custom Signatures")
print("=" * 40)
print()
# Test Translation
print("1. Translation Test")
print("-" * 20)
translator = dspy.Predict(Translate)
result = translator(english_text="Hello, how are you today?")
print(f" English: Hello, how are you today?")
print(f" Spanish: {result.spanish_text}")
print()
# Test Sentiment Analysis
print("2. Sentiment Analysis Test")
print("-" * 20)
analyzer = dspy.Predict(AnalyzeSentiment)
result = analyzer(text="I absolutely love this product! Best purchase ever!")
print(f" Text: I absolutely love this product! Best purchase ever!")
print(f" Sentiment: {result.sentiment}")
print(f" Confidence: {result.confidence}")
print()
# Test Summarization
print("3. Summarization Test")
print("-" * 20)
summarizer = dspy.Predict(Summarize)
long_text = (
"Machine learning is a subset of artificial intelligence that enables "
"computers to learn from data without being explicitly programmed. "
"It uses algorithms to identify patterns in data and make predictions "
"or decisions based on new, unseen data."
)
result = summarizer(text=long_text)
print(f" Original: {long_text[:60]}...")
print(f" Summary: {result.summary}")
print()
# Test Entity Extraction
print("4. Entity Extraction Test")
print("-" * 20)
extractor = dspy.Predict(ExtractEntities)
result = extractor(
text="Elon Musk announced that Tesla will open a new factory in Berlin, Germany."
)
print(f" Text: Elon Musk announced that Tesla will open a new factory in Berlin, Germany.")
print(f" People: {result.people}")
print(f" Organizations: {result.organizations}")
print(f" Locations: {result.locations}")
print()
print("All signatures tested successfully! ✨")
if __name__ == "__main__":
test_signatures()
Key Concepts
Each signature includes a docstring explaining its purpose and uses
dspy.InputField() and dspy.OutputField() with
descriptive desc parameters to guide the LM.
Configure Multiple Language Models
This solution demonstrates how to configure, use, and compare different language models.
"""
Exercise 3: Configure Multiple Language Models - SOLUTION
"""
import os
import time
from dotenv import load_dotenv
import dspy
load_dotenv()
class SimpleQA(dspy.Signature):
"""Answer a question clearly and concisely."""
question: str = dspy.InputField(desc="The question to answer")
answer: str = dspy.OutputField(desc="A clear, concise answer")
def test_model(model_name: str, question: str) -> dict:
"""Test a specific model and return results with timing."""
api_key = os.getenv("OPENAI_API_KEY")
# Configure the model
lm = dspy.LM(model_name, api_key=api_key)
dspy.configure(lm=lm)
# Create predictor
qa = dspy.Predict(SimpleQA)
# Time the prediction
start_time = time.time()
result = qa(question=question)
elapsed_time = time.time() - start_time
return {
"model": model_name,
"time": elapsed_time,
"answer": result.answer
}
def main():
print("Testing Multiple Language Models")
print("=" * 35)
print()
question = "Explain quantum computing in simple terms"
print(f"Question: {question}")
print()
# Define models to test
# Note: Adjust based on your available API keys
models = [
"openai/gpt-4o-mini", # Fast, efficient
"openai/gpt-4o", # More powerful
# "anthropic/claude-3-haiku-20240307", # Fast Claude
# "ollama_chat/llama3.2", # Local model (requires Ollama)
]
results = []
for model in models:
print(f"Testing: {model}")
print("-" * 40)
try:
result = test_model(model, question)
results.append(result)
print(f"Time: {result['time']:.2f}s")
print(f"Response: {result['answer'][:200]}...")
print()
except Exception as e:
print(f"Error: {e}")
print()
# Summary
if len(results) > 1:
print("Summary")
print("-" * 40)
# Find fastest
fastest = min(results, key=lambda x: x["time"])
print(f"Fastest: {fastest['model']} ({fastest['time']:.2f}s)")
# Find longest response (proxy for "most detailed")
most_detailed = max(results, key=lambda x: len(x["answer"]))
print(f"Most detailed: {most_detailed['model']}")
print()
print("Timing Comparison:")
for r in sorted(results, key=lambda x: x["time"]):
bar_length = int(r["time"] * 10)
bar = "█" * bar_length
print(f" {r['model']:30} {r['time']:5.2f}s {bar}")
if __name__ == "__main__":
main()
Key Concepts
Using dspy.configure(lm=lm) to switch between models at
runtime. The solution uses Python's time module to measure
response latency and compare model performance.
Build a Simple Q&A System
This solution creates a context-aware Q&A system with confidence levels and citation support.
"""
Exercise 4: Build a Simple Q&A System - SOLUTION
"""
import os
from dotenv import load_dotenv
import dspy
load_dotenv()
class ContextualQA(dspy.Signature):
"""Answer a question based only on the provided context.
If the answer is not in the context, say so clearly.
"""
context: str = dspy.InputField(
desc="The context paragraph containing potential answers"
)
question: str = dspy.InputField(
desc="The question to answer based on the context"
)
answer: str = dspy.OutputField(
desc="The answer derived from the context, or 'Not found in context' if unavailable"
)
confidence: str = dspy.OutputField(
desc="Confidence level: 'high' (directly stated), 'medium' (inferred), or 'low' (uncertain)"
)
citation: str = dspy.OutputField(
desc="The specific phrase or sentence from the context that supports the answer"
)
class QASystem:
"""A simple question-answering system using DSPy."""
def __init__(self):
# Configure the LM
lm = dspy.LM(
"openai/gpt-4o-mini",
api_key=os.getenv("OPENAI_API_KEY")
)
dspy.configure(lm=lm)
# Create the QA module
self.qa = dspy.Predict(ContextualQA)
def ask(self, context: str, question: str) -> dict:
"""Ask a question about the given context."""
result = self.qa(context=context, question=question)
return {
"question": question,
"answer": result.answer,
"confidence": result.confidence,
"citation": result.citation
}
def main():
print("Context-Aware Q&A System")
print("=" * 40)
print()
# Initialize the system
qa_system = QASystem()
# Test cases
test_cases = [
{
"context": "Paris is the capital of France. It has a population of about 2.1 million people.",
"question": "What is the capital of France?"
},
{
"context": "Python was created by Guido van Rossum and released in 1991.",
"question": "Who created Python?"
},
{
"context": "The Great Wall of China is over 13,000 miles long.",
"question": "What is the main programming language used in AI?"
}
]
for i, test in enumerate(test_cases, 1):
print(f"Test Case {i}")
print("-" * 40)
print(f"Context: {test['context']}")
print(f"Question: {test['question']}")
print()
result = qa_system.ask(test["context"], test["question"])
print(f"Answer: {result['answer']}")
print(f"Confidence: {result['confidence']}")
print(f"Citation: {result['citation']}")
print()
print("Q&A System test complete! ✨")
if __name__ == "__main__":
main()
Expected Output
Context-Aware Q&A System ======================================== Test Case 1 ---------------------------------------- Context: Paris is the capital of France. It has a population of about 2.1 million people. Question: What is the capital of France? Answer: Paris Confidence: high Citation: Paris is the capital of France Test Case 2 ---------------------------------------- Context: Python was created by Guido van Rossum and released in 1991. Question: Who created Python? Answer: Guido van Rossum Confidence: high Citation: Python was created by Guido van Rossum Test Case 3 ---------------------------------------- Context: The Great Wall of China is over 13,000 miles long. Question: What is the main programming language used in AI? Answer: Not found in context Confidence: low Citation: N/A - The context does not contain information about programming languages Q&A System test complete! ✨
Key Concepts
The signature's docstring instructs the LM to handle unanswerable questions gracefully. Multiple output fields provide structured responses with metadata.
Multi-Step Classification Pipeline
This advanced solution builds a complete multi-step text analysis pipeline using DSPy modules.
"""
Exercise 5: Multi-Step Classification Pipeline - SOLUTION
"""
import os
from dotenv import load_dotenv
import dspy
load_dotenv()
# Step 1: Topic Extraction
class ExtractTopic(dspy.Signature):
"""Extract the main topic from the text."""
text: str = dspy.InputField(desc="The text to analyze")
topic: str = dspy.OutputField(desc="The main topic in 2-5 words")
# Step 2: Sentiment Classification
class ClassifySentiment(dspy.Signature):
"""Classify the overall sentiment of the text."""
text: str = dspy.InputField(desc="The text to analyze")
sentiment: str = dspy.OutputField(
desc="The sentiment: 'positive', 'negative', or 'neutral'"
)
# Step 3: Audience Detection
class DetectAudience(dspy.Signature):
"""Determine the intended audience based on content and style."""
text: str = dspy.InputField(desc="The text to analyze")
topic: str = dspy.InputField(desc="The main topic of the text")
audience: str = dspy.OutputField(
desc="The audience: 'general', 'technical', or 'academic'"
)
# Step 4: Tailored Summary
class TailoredSummary(dspy.Signature):
"""Generate a summary appropriate for the target audience."""
text: str = dspy.InputField(desc="The original text")
audience: str = dspy.InputField(desc="The target audience type")
summary: str = dspy.OutputField(
desc="A 1-2 sentence summary written for the specified audience"
)
class TextAnalysisPipeline(dspy.Module):
"""Multi-step text analysis pipeline using DSPy."""
def __init__(self):
super().__init__()
# Initialize each step as a DSPy module
self.extract_topic = dspy.Predict(ExtractTopic)
self.classify_sentiment = dspy.Predict(ClassifySentiment)
self.detect_audience = dspy.Predict(DetectAudience)
self.generate_summary = dspy.Predict(TailoredSummary)
def forward(self, text: str) -> dict:
"""Process text through the complete pipeline."""
# Step 1: Extract topic
topic_result = self.extract_topic(text=text)
topic = topic_result.topic
# Step 2: Classify sentiment
sentiment_result = self.classify_sentiment(text=text)
sentiment = sentiment_result.sentiment
# Step 3: Detect audience (uses topic from step 1)
audience_result = self.detect_audience(text=text, topic=topic)
audience = audience_result.audience
# Step 4: Generate tailored summary (uses audience from step 3)
summary_result = self.generate_summary(text=text, audience=audience)
summary = summary_result.summary
# Return structured results
return {
"topic": topic,
"sentiment": sentiment,
"audience": audience,
"summary": summary
}
def main():
print("Multi-Step Text Analysis Pipeline")
print("=" * 40)
print()
# Configure the LM
lm = dspy.LM(
"openai/gpt-4o-mini",
api_key=os.getenv("OPENAI_API_KEY")
)
dspy.configure(lm=lm)
# Initialize the pipeline
pipeline = TextAnalysisPipeline()
# Test texts
test_texts = [
(
"Machine learning models require large datasets and computational power. "
"Recent advances in transformer architectures have revolutionized NLP tasks, "
"enabling breakthroughs in language understanding and generation."
),
(
"I absolutely love this new restaurant! The food was amazing and the "
"service was excellent. The pasta was cooked to perfection and the "
"dessert was heavenly. Can't wait to go back!"
),
(
"The study examined the correlation between sleep deprivation and "
"cognitive performance in undergraduate students. Results indicated "
"a statistically significant decrease in working memory capacity "
"following periods of restricted sleep (p < 0.05)."
),
]
for i, text in enumerate(test_texts, 1):
# Truncate for display
display_text = text[:60] + "..." if len(text) > 60 else text
print(f"Processing Text {i}")
print(f'"{display_text}"')
print()
# Run the pipeline
results = pipeline(text)
print("Results:")
print("=" * 30)
print(f"Topic: {results['topic']}")
print(f"Sentiment: {results['sentiment']}")
print(f"Audience: {results['audience']}")
print(f"Summary (for {results['audience']} audience):")
print(f" {results['summary']}")
print()
print("-" * 40)
print()
print("Pipeline test complete! ✨")
if __name__ == "__main__":
main()
Expected Output
Multi-Step Text Analysis Pipeline ======================================== Processing Text 1 "Machine learning models require large datasets and computational..." Results: ============================== Topic: Machine Learning and NLP Sentiment: neutral Audience: technical Summary (for technical audience): ML models require significant compute and data; transformer architectures have revolutionized NLP capabilities. ---------------------------------------- Processing Text 2 "I absolutely love this new restaurant! The food was amazing and..." Results: ============================== Topic: Restaurant Review Sentiment: positive Audience: general Summary (for general audience): A glowing review of a restaurant with excellent food, service, and desserts. ---------------------------------------- Processing Text 3 "The study examined the correlation between sleep deprivation and..." Results: ============================== Topic: Sleep and Cognition Research Sentiment: neutral Audience: academic Summary (for academic audience): Research demonstrates a significant negative correlation between sleep restriction and working memory in undergraduates (p < 0.05). ---------------------------------------- Pipeline test complete! ✨
Key Concepts
This solution uses dspy.Module to create a reusable
pipeline. Each step passes information to subsequent steps, creating a
chain of dependent processing. The forward() method
orchestrates the multi-step flow.
Extension Ideas
Try extending this pipeline by adding error handling, caching results, or implementing parallel execution for independent steps.
📝 Solution Summary
Chapter 1 Complete!
You've reviewed all the solutions for Chapter 1. Ready to dive deeper into DSPy signatures?