Field Types & Descriptions | Chapter 2 | DSPy: The Comprehensive Guide

🎯 Why Types and Descriptions Matter

Field types and descriptions are powerful tools for shaping LM outputs:

🏷️

Type Hints

Tell DSPy what format you expect (string, list, number, boolean).

📝

Descriptions

Provide constraints and context that guide the LM's response.

✅

Validation

DSPy can validate and parse outputs based on your type hints.

📊 Supported Field Types

Basic Types

import dspy

class BasicTypes(dspy.Signature):
    """Demonstrate basic field types."""
    
    # String - most common, default type
    text: str = dspy.InputField()
    summary: str = dspy.OutputField()
    
    # Integer
    word_count: int = dspy.OutputField(desc="Number of words")
    
    # Float
    confidence: float = dspy.OutputField(desc="Score from 0.0 to 1.0")
    
    # Boolean
    is_valid: bool = dspy.OutputField(desc="True if valid, False otherwise")

Collection Types

class CollectionTypes(dspy.Signature):
    """Demonstrate collection field types."""
    
    # List of strings
    keywords: list[str] = dspy.OutputField(
        desc="List of relevant keywords"
    )
    
    # List of integers
    page_numbers: list[int] = dspy.OutputField(
        desc="Relevant page numbers"
    )
    
    # Note: Dictionaries require careful handling
    # Use typing.Dict or structured output patterns

Literal Types (Constrained Values)

from typing import Literal

class ConstrainedOutput(dspy.Signature):
    """Use Literal for constrained output values."""
    
    text: str = dspy.InputField()
    
    # Only these specific values allowed
    sentiment: Literal["positive", "negative", "neutral"] = dspy.OutputField()
    
    priority: Literal["low", "medium", "high", "critical"] = dspy.OutputField()
    
    rating: Literal[1, 2, 3, 4, 5] = dspy.OutputField()

💡

Pro tip: Literal types are great for classification tasks where you want specific output values!

✍️ Writing Effective Descriptions

The desc parameter is your main tool for guiding outputs:

Format Specifications

class FormattedOutputs(dspy.Signature):
    """Generate formatted outputs."""
    
    document: str = dspy.InputField()
    
    # Specify format
    summary: str = dspy.OutputField(
        desc="2-3 sentences maximum"
    )
    
    date: str = dspy.OutputField(
        desc="Date in YYYY-MM-DD format"
    )
    
    phone: str = dspy.OutputField(
        desc="Phone number in format: (XXX) XXX-XXXX"
    )
    
    bullet_points: str = dspy.OutputField(
        desc="Formatted as a bulleted list with - prefix"
    )

Value Constraints

class ConstrainedValues(dspy.Signature):
    """Outputs with value constraints."""
    
    text: str = dspy.InputField()
    
    # Numeric ranges
    rating: int = dspy.OutputField(
        desc="Rating from 1 to 10"
    )
    
    confidence: float = dspy.OutputField(
        desc="Confidence between 0.0 and 1.0"
    )
    
    # Categorical constraints
    category: str = dspy.OutputField(
        desc="One of: 'tech', 'finance', 'health', 'sports'"
    )
    
    # Length constraints
    title: str = dspy.OutputField(
        desc="Short title, maximum 10 words"
    )

Contextual Instructions

class ContextualOutputs(dspy.Signature):
    """Outputs with contextual guidance."""
    
    article: str = dspy.InputField(
        desc="News article to analyze"
    )
    
    # Tone guidance
    summary: str = dspy.OutputField(
        desc="Professional, objective summary for business readers"
    )
    
    # Audience consideration
    explanation: str = dspy.OutputField(
        desc="Simple explanation suitable for a general audience"
    )
    
    # Action-oriented
    action_items: list[str] = dspy.OutputField(
        desc="Specific, actionable items the reader should take"
    )

🔗 Combining Types and Descriptions

The most effective signatures combine type hints with descriptive text:

from typing import Literal

class ProductReview(dspy.Signature):
    """Analyze a product review comprehensively."""
    
    review_text: str = dspy.InputField(
        desc="The full text of the customer review"
    )
    
    # Literal type + description
    sentiment: Literal["positive", "negative", "mixed"] = dspy.OutputField(
        desc="Overall sentiment of the review"
    )
    
    # Float type + constrained range
    rating_prediction: float = dspy.OutputField(
        desc="Predicted star rating from 1.0 to 5.0"
    )
    
    # List type + specific guidance
    pros: list[str] = dspy.OutputField(
        desc="Positive aspects mentioned, 3-5 items"
    )
    
    cons: list[str] = dspy.OutputField(
        desc="Negative aspects mentioned, 3-5 items"
    )
    
    # Boolean + clear criteria
    would_recommend: bool = dspy.OutputField(
        desc="True if reviewer would recommend the product"
    )
    
    # String + format specification
    one_liner: str = dspy.OutputField(
        desc="One sentence summary, max 15 words"
    )

Example Usage

import dspy

# Configure LM
lm = dspy.LM("openai/gpt-4o-mini")
dspy.configure(lm=lm)

# Create predictor
reviewer = dspy.Predict(ProductReview)

# Analyze a review
result = reviewer(
    review_text="""
    I've been using this laptop for 3 months now. The battery life 
    is amazing - easily lasts 12 hours! The keyboard is comfortable 
    for long typing sessions. However, the trackpad is a bit small 
    and the speakers are mediocre. For the price point though, it's 
    a solid choice for students or light work.
    """
)

print(f"Sentiment: {result.sentiment}")           # positive
print(f"Predicted Rating: {result.rating_prediction}")  # 4.0
print(f"Pros: {result.pros}")                     # ['Amazing battery life', ...]
print(f"Cons: {result.cons}")                     # ['Small trackpad', ...]
print(f"Would Recommend: {result.would_recommend}")  # True
print(f"Summary: {result.one_liner}")

📥 Input Field Descriptions

Descriptions on input fields help the LM understand the context:

class TranslationTask(dspy.Signature):
    """Translate text while preserving meaning and tone."""
    
    # Descriptive input fields
    source_text: str = dspy.InputField(
        desc="The original text to translate"
    )
    
    source_language: str = dspy.InputField(
        desc="The language of the source text (e.g., 'English')"
    )
    
    target_language: str = dspy.InputField(
        desc="The language to translate into (e.g., 'Spanish')"
    )
    
    formality: str = dspy.InputField(
        desc="Desired formality: 'formal', 'casual', or 'neutral'"
    )
    
    # Output
    translation: str = dspy.OutputField(
        desc="Translated text matching the specified formality"
    )

🚀 Advanced Patterns

Optional Fields with Defaults

from typing import Optional

class FlexibleAnalysis(dspy.Signature):
    """Analyze with optional detailed output."""
    
    text: str = dspy.InputField()
    include_details: bool = dspy.InputField(
        desc="Whether to include detailed analysis"
    )
    
    summary: str = dspy.OutputField()
    
    # Optional output - may be empty if not requested
    detailed_analysis: Optional[str] = dspy.OutputField(
        desc="Detailed analysis if include_details is True"
    )

Structured JSON-like Outputs

class EntityExtraction(dspy.Signature):
    """Extract named entities from text."""
    
    text: str = dspy.InputField()
    
    # Use list with structured descriptions
    people: list[str] = dspy.OutputField(
        desc="Names of people mentioned"
    )
    
    organizations: list[str] = dspy.OutputField(
        desc="Organization and company names"
    )
    
    locations: list[str] = dspy.OutputField(
        desc="Geographic locations mentioned"
    )
    
    dates: list[str] = dspy.OutputField(
        desc="Dates in standardized YYYY-MM-DD format"
    )

Multi-Step Reasoning Outputs

class ReasonedClassification(dspy.Signature):
    """Classify with explicit reasoning steps."""
    
    content: str = dspy.InputField(
        desc="Content to classify"
    )
    
    # Force explicit reasoning
    observations: list[str] = dspy.OutputField(
        desc="Key observations about the content"
    )
    
    reasoning: str = dspy.OutputField(
        desc="Step-by-step reasoning for classification"
    )
    
    classification: str = dspy.OutputField(
        desc="Final classification based on reasoning"
    )
    
    confidence: float = dspy.OutputField(
        desc="Confidence in classification (0.0 to 1.0)"
    )

⚠️ Common Mistakes to Avoid

❌

Vague Descriptions

# Bad - too vague
output: str = dspy.OutputField(desc="The result")

# Good - specific
summary: str = dspy.OutputField(
    desc="2-3 sentence summary capturing main points"
)

❌

Conflicting Instructions

# Bad - contradictory
summary: str = dspy.OutputField(
    desc="Detailed comprehensive summary in 1 sentence"
)

# Good - consistent
summary: str = dspy.OutputField(
    desc="Concise 1-sentence summary of key point"
)

❌

Missing Type Hints

# Bad - no type hint
keywords = dspy.OutputField(desc="List of keywords")

# Good - typed
keywords: list[str] = dspy.OutputField(
    desc="List of 5-10 relevant keywords"
)

📝 Key Takeaways

Type hints tell DSPy the expected format (str, int, list, bool)

Descriptions provide constraints, format specs, and context

Literal types constrain outputs to specific values

Combine both for maximum control over outputs

Be specific and consistent in your descriptions

Continue to Exercises