2.3. Projection, Part 1

# This chunk must be in the first plotting cell of each notebook in order to guarantee that the mathjax script is loaded.

import plotly
from IPython.display import display, HTML

plotly.offline.init_notebook_mode()
display(HTML(
    '<script type="text/javascript" async src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-MML-AM_SVG"></script>'
))

import numpy as np
import plotly.graph_objects as go

u = np.array([3, 1])
v = np.array([3, -2])

def create_vector_trace(coordinates, color, label):
    x, y = coordinates
    return go.Scatter(
        x=[0, x], 
        y=[0, y],
        mode='lines+markers',
        line=dict(color=color, width=4),
        marker=dict(
            size=[0, 16],
            color=[color, color],
            symbol=['circle', 'arrow'],
            angleref='previous'
        ),
        hovertemplate='(%{x}, %{y})<extra></extra>',
        showlegend=False,
        name=label
    )

def make_traces(k, show_error):
    traces = []
    traces.append(create_vector_trace(tuple(u), 'orange', r'$\vec u$'))
    traces.append(create_vector_trace(tuple(v), 'rgba(61,129,246,0.5)', r'$\vec v$'))
    kv = k * v
    traces.append(create_vector_trace(tuple(kv), '#004d40', r'$k\vec v$'))
    error = u - kv
    if show_error:
        traces.append(go.Scatter(
            x=[kv[0], u[0]], 
            y=[kv[1], u[1]],
            mode='lines+markers',
            line=dict(color='#d81b60', width=3, dash='dot'),
            marker=dict(
                size=[0, 12],
                color=['#d81b60', '#d81b60'],
                symbol=['circle', 'arrow'],
                angleref='previous'
            ),
            hovertemplate='(%{x}, %{y})<extra></extra>',
            showlegend=False,
            name=fr'$\vec u - {k:.2f}\vec v$'
        ))
    return traces, kv, error

def plot_projection(title, show_error):
    k_vals = np.linspace(-1, 2.0, 101)
    k0 = 0.83
    traces, kv, error = make_traces(k0, show_error)
    fig = go.Figure(data=traces)

    # Permanent annotations for u and v
    fig.add_annotation(
        x=0.5 * u[0],
        y=0.5 * u[1] + 0.5,
        text=r"$\vec u$",
        showarrow=False,
        font=dict(size=14, family="Palatino, serif", color="orange"),
        align="center"
    )
    fig.add_annotation(
        x=0.5 * v[0],
        y=0.5 * v[1] - 0.6,
        text=r"$\vec v$",
        showarrow=False,
        font=dict(size=14, family="Palatino, serif", color="#3d81f6"),
        align="center"
    )
    # k annotation
    fig.add_annotation(
        x=kv[0] + 0.5,
        y=kv[1] - 1,
        text=fr"$k \vec v = {k0:.2f} \vec v$",
        showarrow=False,
        font=dict(size=14, family="Palatino, serif", color="#004d40"),
        align="center"
    )
    # error annotation (only if show_error)
    if show_error:
        fig.add_annotation(
            x=kv[0] + 0.5 * (u[0] - kv[0]) + 1,
            y=kv[1] + 0.5 * (u[1] - kv[1]) - 0.5,
            text=fr"$\vec e = \vec u - ({k0:.2f}\vec v)$",
            showarrow=False,
            font=dict(size=14, family="Palatino, serif", color="#d81b60"),
            align="center"
        )

    # Frames for animation/slider
    frames = []
    for k in k_vals:
        traces, kv, error = make_traces(k, show_error)
        annots = [
            dict(
                x=0.5 * u[0],
                y=0.5 * u[1] + 0.5,
                text=r"$\vec u$",
                showarrow=False,
                font=dict(size=14, family="Palatino, serif", color="orange"),
                align="center"
            ),
            dict(
                x=0.5 * v[0],
                y=0.5 * v[1] - 0.6,
                text=r"$\vec v$",
                showarrow=False,
                font=dict(size=14, family="Palatino, serif", color="#3d81f6"),
                align="center"
            ),
            dict(
                x=kv[0] + 0.5,
                y=kv[1] - 1,
                text=fr"$k \vec v = {k:.2f} \vec v$",
                showarrow=False,
                font=dict(size=14, family="Palatino, serif", color="#004d40"),
                align="center"
            )
        ]
        if show_error:
            annots.append(
                dict(
                    x=kv[0] + 0.5 * (u[0] - kv[0]) + 1,
                    y=kv[1] + 0.5 * (u[1] - kv[1]) - 0.5,
                    text=fr"$\vec e = \vec u - ({k:.2f}\vec v)$",
                    showarrow=False,
                    font=dict(size=14, family="Palatino, serif", color="#d81b60"),
                    align="center"
                )
            )
        frames.append(go.Frame(
            data=traces,
            name=f"{k:.2f}",
            layout=go.Layout(
                annotations=annots
            )
        ))
    fig.frames = frames

    # Slider steps
    steps = []
    for i, k in enumerate(k_vals):
        step = dict(
            method="animate",
            args=[
                [f"{k:.2f}"],
                {"mode": "immediate", "frame": {"duration": 0, "redraw": True}, "transition": {"duration": 0}}
            ],
            label=f"{k:.2f}"
        )
        steps.append(step)

    sliders = [dict(
        active=int((k0 - k_vals[0]) / (k_vals[1] - k_vals[0])),
        currentvalue={"prefix": "k = "},
        pad={"t": 30},
        steps=steps,
        minorticklen=0
    )]

    fig.update_layout(
        title=title,
        width=600,
        height=500,
        yaxis_scaleanchor="x",
        margin=dict(l=10, r=10, t=30, b=10),
        sliders=sliders,
        font=dict(family="Palatino, serif"),
        plot_bgcolor="white",
        paper_bgcolor="white",
        xaxis=dict(
            gridcolor="#f0f0f0",
            zerolinecolor="#f0f0f0"
        ),
        yaxis=dict(
            gridcolor="#f0f0f0",
            zerolinecolor="#f0f0f0"
        ),
    )
    fig.update_xaxes(range=[-3, 6], tickvals=np.arange(-4, 10), gridcolor="#f0f0f0", zerolinecolor="#f0f0f0")
    fig.update_yaxes(range=[-4, 2], tickvals=np.arange(-4, 4), gridcolor="#f0f0f0", zerolinecolor="#f0f0f0")
    fig.show(renderer="notebook")

plot_projection("", False)

Loading...

Notice that the set of vectors of the form $\color{#004d40} k\vec v$ fill out a line. So really, what we’re asking is which vector on this line is closest to $\color{orange}\vec u$ .

In terms of angles, if $\theta$ is the angle between $\color{orange}\vec u$ and $\color{#3d81f6}\vec v$ , then the angle between $\color{orange}\vec u$ and $\color{#004d40} k\vec v$ is either $\theta$ (if $k > 0$ ) or $180^{\circ} - \theta$ (if $k < 0$ ). So changing $k$ doesn’t change how “similar” $\color{orange}\vec u$ and $\color{#004d40} k\vec v$ are in the cosine similarity sense.

But, some choices of $k$ will make $\color{#004d40} k\vec v$ closer to $\color{orange}\vec u$ than others. I call this the approximation problem: how well can we recreate, or approximate, $\color{orange}\vec u$ using a scalar multiple of $\color{#3d81f6}\vec v$ ? It turns out that linear regression is intimately related to this problem. Previously, we were trying to approximate commute times as best as we could using a linear function of departure times.

Let’s be more precise by what we mean by “closer”. For any value of $k$ , we can measure the error of our approximation by the length of the error vector, ${\color{#d81b60} \vec e } = {\color{orange}\vec u} - \color{#004d40} k\vec v$ .

plot_projection("", True)

Loading...

Continue to play with the slider above for $k$ . How do you get the length of the error vector to be as small as possible?

Intuitively, it seems that to get the error vector $\color{#d81b60} \vec e$ to be as short as possible, we should make it orthogonal to $\color{#3d81f6}\vec v$ . Since we can control $k$ , we can control ${\color{orange}\vec u} - \color{#004d40} k\vec v$ , so we can make the error vector orthogonal to $\color{#3d81f6}\vec v$ by choosing the right $k$ .

Orthogonal Projections¶

Our goal is to minimize the length of the error vector $\lVert {\color{#d81b60}\vec e} \rVert$ .

\lVert {\color{#d81b60}\vec e} \rVert

This is the same as minimizing

\lVert {\color{orange}\vec u} - {\color{#004d40} k\vec v} \rVert

One way to approach this problem is to treat the above expression as a function of $k$ and find the value of $k$ that minimizes it through calculus. You’ll do this in Homework 3. I’ll show you a more geometric approach here.

We’ve guessed, but not yet shown that the shortest possible error vector is the one that is orthogonal to $\color{#3d81f6}\vec v$ . Let $k_o$ be the value of $k$ that makes the error vector ${\color{#d81b60}\vec e_o} = {\color{orange}\vec u} - \color{#004d40} k_o\vec v$ orthogonal to $\color{#3d81f6}\vec v$ . Here, we’ll prove that $k_o$ is the “best” choice of $k$ by showing that any other choice of $k$ will result in an error vector that is longer than $\color{#d81b60}\vec e_o$ . Think of this as a proof by contradiction (if you’re familiar with that idea; no worries if not).

For comparison, let $k'$ be some other value of $k$ , and let its error vector be $\vec e' = {\color{orange}\vec u} - k' \color{#3d81f6}\vec v$ .

# This chunk must be in the first plotting cell of each notebook in order to guarantee that the mathjax script is loaded.

import plotly
from IPython.display import display, HTML

plotly.offline.init_notebook_mode()
display(HTML(
    '<script type="text/javascript" async src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-MML-AM_SVG"></script>'
))

import numpy as np
import plotly.graph_objects as go

u = np.array([3, 1])
v = np.array([3, -2])

# Calculate k_o where error is orthogonal to v
k_o = np.dot(u, v) / np.dot(v, v)
k_prime = k_o * 0.7  # Make k' slightly shorter than k_o

def create_vector_trace(coordinates, color, label, opacity=1.0):
    x, y = coordinates
    return go.Scatter(
        x=[0, x], 
        y=[0, y],
        mode='lines+markers',
        line=dict(color=color, width=4),
        marker=dict(
            size=[0, 16],
            color=[color, color],
            symbol=['circle', 'arrow'],
            angleref='previous'
        ),
        hovertemplate='(%{x}, %{y})<extra></extra>',
        showlegend=False,
        name=label,
        opacity=opacity
    )

def create_error_trace(start_coords, end_coords, color, label, opacity=1.0):
    return go.Scatter(
        x=[start_coords[0], end_coords[0]], 
        y=[start_coords[1], end_coords[1]],
        mode='lines+markers',
        line=dict(color=color, width=3, dash='dot'),
        marker=dict(
            size=[0, 12],
            color=[color, color],
            symbol=['circle', 'arrow'],
            angleref='previous'
        ),
        hovertemplate='(%{x}, %{y})<extra></extra>',
        showlegend=False,
        name=label,
        opacity=opacity
    )

def plot_static_projection():
    # Calculate vectors
    k_o_v = k_o * v
    k_prime_v = k_prime * v
    e_o = u - k_o_v  # Error vector for k_o (orthogonal to v)
    e_prime = u - k_prime_v  # Error vector for k_prime
    
    # Create traces
    traces = []
    
    # --- SHADE THE RIGHT-ANGLED TRIANGLE ---
    # The triangle vertices are: k_prime_v, k_o_v, u
    triangle_x = [k_prime_v[0], k_o_v[0], u[0], k_prime_v[0]]
    triangle_y = [k_prime_v[1], k_o_v[1], u[1], k_prime_v[1]]
    triangle_trace = go.Scatter(
        x=triangle_x,
        y=triangle_y,
        fill='toself',
        fillcolor='rgba(255, 215, 64, 0.25)',  # light gold, semi-transparent
        line=dict(color='rgba(255, 215, 64, 0.0)', width=0),  # no border
        mode='lines',
        showlegend=False,
        hoverinfo='skip'
    )
    traces.append(triangle_trace)
    # --- END SHADE ---

    # Original vectors u and v
    traces.append(create_vector_trace(tuple(u), 'orange', r'$\vec u$'))
    traces.append(create_vector_trace(tuple(v), '#3d81f6', r'$\vec v$', opacity=0.5))
    
    # k_o * v (optimal projection) - now #004d40
    traces.append(create_vector_trace(tuple(k_o_v), '#004d40', r'$k_o \vec v$'))
    
    # k' * v (shorter projection) - now gray
    traces.append(create_vector_trace(tuple(k_prime_v), 'gray', r"$k' \vec v$", opacity=0.8))
    
    # Error vector e_o (orthogonal to v)
    traces.append(create_error_trace(tuple(k_o_v), tuple(u), '#d81b60', r'$\vec e_o$'))
    
    # Error vector e' (for k') - now gray
    traces.append(create_error_trace(tuple(k_prime_v), tuple(u), 'gray', r"$\vec e'$"))
    
    # Add right angle symbol at the base of e_o
    # We'll draw a small right angle marker at the base (k_o_v) between v and e_o
    # The right angle marker will be a small "L" shape

    # Compute unit vectors for v and e_o
    v_unit = v / np.linalg.norm(v)
    e_o_unit = e_o / np.linalg.norm(e_o)
    # The right angle marker will be offset a small distance from k_o_v along v_unit and e_o_unit
    marker_size = 0.12  # controls the size of the right angle symbol

    # Start at k_o_v, move a bit along v_unit, then a bit along e_o_unit to form the "L"
    p0 = k_o_v
    p1 = p0 + -v_unit * marker_size
    p2 = p1 + e_o_unit * marker_size

    right_angle_trace = go.Scatter(
        x=[p1[0], p2[0], p2[0] - (p1[0] - p0[0])],
        y=[p1[1], p2[1], p2[1] - (p1[1] - p0[1])],
        mode='lines',
        line=dict(color="#222", width=2),
        showlegend=False,
        hoverinfo='skip'
    )
    traces.append(right_angle_trace)
    
    fig = go.Figure(data=traces)
    
    # Add annotations, positioned tightly to the relevant vectors/points
    # u vector (at tip)
    fig.add_annotation(
        x=u[0],
        y=u[1] + 0.13,
        text=r"$\vec u$",
        showarrow=False,
        font=dict(size=18, family="Palatino, serif", color="orange"),
        align="center"
    )
    
    # v vector (at tip)
    fig.add_annotation(
        x=v[0] + 0.13,
        y=v[1] - 0.13,
        text=r"$\vec v$",
        showarrow=False,
        font=dict(size=18, family="Palatino, serif", color="#3d81f6"),
        align="center"
    )
    
    # k_o * v (at tip) - now #004d40
    fig.add_annotation(
        x=k_o_v[0] + -0.1,
        y=k_o_v[1] - 0.13,
        text=fr"$k_o \vec v$",
        showarrow=False,
        font=dict(size=18, family="Palatino, serif", color="#004d40"),
        align="left"
    )
    
    # k' * v (at tip) - now gray
    fig.add_annotation(
        x=k_prime_v[0] - 0.1,
        y=k_prime_v[1] - 0.13,
        text=fr"$k' \vec v$",
        showarrow=False,
        font=dict(size=18, family="Palatino, serif", color="gray"),
        align="right"
    )
    
    # e_o error vector (midpoint between k_o*v and u, offset slightly)
    fig.add_annotation(
        x=(k_o_v[0] + u[0]) / 2 + 0.21,
        y=(k_o_v[1] + u[1]) / 2 + 0.03,
        text=r"$\vec e_o$",
        showarrow=False,
        font=dict(size=18, family="Palatino, serif", color="#d81b60"),
        align="center"
    )
    
    # e' error vector (midpoint between k'*v and u, offset slightly) - now gray
    fig.add_annotation(
        x=(k_prime_v[0] + u[0]) / 2 - 0.08,
        y=(k_prime_v[1] + u[1]) / 2 + 0.12,
        text=r"$\vec e'$",
        showarrow=False,
        font=dict(size=18, family="Palatino, serif", color="gray"),
        align="center"
    )
    
    # Triangle side annotation (midpoint between k_o*v and k'*v, offset slightly)
    # fig.add_annotation(
    #     x=(k_prime_v[0] + k_o_v[0]) / 2 - 0.08,
    #     y=(k_prime_v[1] + k_o_v[1]) / 2 - 0.13,
    #     text=r"$(k_o - k') \vec v$",
    #     showarrow=False,
    #     font=dict(size=15, family="Palatino, serif", color="purple"),
    #     align="center"
    # )
    
    # Update layout: tightly clip axes around the figure
    min_x = min(0, k_prime_v[0], k_o_v[0], u[0], v[0]) - 0.5
    max_x = max(0, k_prime_v[0], k_o_v[0], u[0], v[0]) + 0.5
    min_y = min(0, k_prime_v[1], k_o_v[1], u[1], v[1]) - 0.5
    max_y = max(0, k_prime_v[1], k_o_v[1], u[1], v[1]) + 0.5

    fig.update_layout(
        width=480,
        height=420,
        yaxis_scaleanchor="x",
        margin=dict(l=10, r=10, t=10, b=10),
        font=dict(family="Palatino, serif"),
        plot_bgcolor="white",
        paper_bgcolor="white",
    )
    fig.update_xaxes(
        range=[min_x, max_x],
        showticklabels=False,
        gridcolor="#fff",
        zerolinecolor="#fff"
    )
    fig.update_yaxes(
        range=[min_y, max_y],
        showticklabels=False,
        gridcolor="#fff",
        zerolinecolor="#fff"
    )
    
    # Show as static PNG
    return fig

plot_static_projection().show(renderer='png', scale=3)

Loading...

I’ve drawn $k' \vec v$ in gray. Arbitrarily, I’ve shown it as being shorter than $\color{#004d40} k_o \vec v$ , but I could have drawn it as being longer and the argument would be the same. The prime has nothing to do with derivatives, by the way – it’s just a new variable.

The vectors $k' \vec v$ and $\color{#004d40} k_o \vec v$ , along with their corresponding error vectors, create a right-angled triangle, shaded in gold above. This triangle has a hypotenuse of $\vec e'$ and legs of $\color{#d81b60} \vec e_o$ and $k' \vec v - \color{#004d40} k_o \vec v$ .

Applying the Pythagorean theorem to this triangle gives us

\lVert \vec e' \rVert^2 = \lVert {\color{#d81b60}\vec e_o} \rVert^2 + \underbrace{\lVert k' \vec v - {\color{#004d40} k_o \vec v} \rVert^2}_{> 0}

$\lVert k' \vec v - {\color{#004d40} k_o \vec v} \rVert^2 \geq 0$ , with equality only when we choose $k' = k_o$ , but we’ve assumed that $k' \neq k_o$ .

This implies that

\lVert \vec e' \rVert^2 = \lVert {\color{#d81b60}\vec e_o} \rVert^2 + \text{some positive number}

which means that $\lVert \vec e' \rVert^2 > \lVert {\color{#d81b60}\vec e_o} \rVert^2$ .

In other words, if $k' \neq k_o$ , then $\lVert \vec e' \rVert > \lVert {\color{#d81b60}\vec e_o} \rVert$ . Thus, the error vector $\color{#d81b60}\vec e_o$ is shorter than any other error vector $\vec e'$ , and the best choice of $k$ is $k_o$ !

What was the point of all this again?

We know that the answer is $\color{#004d40} k^* \vec v$ , where $k^*$ is the value of $k$ that makes the error vector ${\color{#d81b60}\vec e} = {\color{orange}\vec u} - \color{#004d40} k^* \vec v$ orthogonal to $\color{#3d81f6}\vec v$ . (I’ve switched from calling this optimal scalar $k_o$ to $k^*$ ; $k_o$ was a name I used in the proof above, but more generally, “optimal” values are starred for our purposes).

Let’s now find the value of $k^*$ , in terms of just $\color{orange} \vec u$ and $\color{#3d81f6} \vec v$ . If $\color{#d81b60}\vec e$ is orthogonal to $\color{#3d81f6}\vec v$ , then ${\color{#3d81f6}\vec v} \cdot {\color{#d81b60}\vec e} = 0$ .

\begin{align*} {\color{#3d81f6}\vec v} \cdot {\color{#d81b60}\vec e} &= 0 \\ {\color{#3d81f6}\vec v} \cdot ({\color{orange}\vec u} - {\color{#004d40} k^* \vec v}) &= 0 \\ {\color{#3d81f6}\vec v} \cdot {\color{orange}\vec u} - {\color{#3d81f6}\vec v} \cdot {\color{#004d40} k^* \vec v} &= 0 \\ {\color{#3d81f6}\vec v} \cdot {\color{orange}\vec u} - {\color{#004d40} k^*} ({\color{#3d81f6}\vec v} \cdot {\color{#3d81f6} \vec v}) &= 0 \\ {\color{#3d81f6}\vec v} \cdot {\color{orange}\vec u} &= {\color{#004d40} k^*} ({\color{#3d81f6}\vec v} \cdot {\color{#3d81f6} \vec v}) \\ {\color{#004d40} k^*} &= \boxed{\frac{{\color{orange}\vec u} \cdot {\color{#3d81f6}\vec v}}{{\color{#3d81f6}\vec v} \cdot {\color{#3d81f6}\vec v}}} \end{align*}

The boxed value above is a scalar. It tells us the optimal amount to multiply $\color{#3d81f6}\vec v$ by to get the best approximation of $\color{orange}\vec u$ . Once we multiply that boxed scalar, $k^*$ , by the vector $\color{#3d81f6}\vec v$ , we get what’s called the orthogonal projection of $\color{orange}\vec u$ onto $\color{#3d81f6}\vec v$ .

Among all vectors of the form $k\color{#3d81f6}\vec v$ , $\vec p$ above is the one that is closest to $\color{orange}\vec u$ .

Why “orthogonal projection”? “Orthogonal” comes from the fact that $\vec p$ ’s error vector ${\color{#d81b60}\vec e} = {\color{orange}\vec u} - \vec p$ is orthogonal to $\color{#3d81f6}\vec v$ . “Projection” comes from the intuition you should have that $\vec p$ is the shadow of $\color{orange}\vec u$ onto $\color{#3d81f6}\vec v$ .

# This chunk must be in the first plotting cell of each notebook in order to guarantee that the mathjax script is loaded.

import plotly
from IPython.display import display, HTML

plotly.offline.init_notebook_mode()
display(HTML(
    '<script type="text/javascript" async src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-MML-AM_SVG"></script>'
))

import numpy as np
import plotly.graph_objects as go

u = np.array([3, 1])
v = np.array([3, -2])

# Calculate k^* where error is orthogonal to v
k_star = np.dot(u, v) / np.dot(v, v)
p = k_star * v  # p = k^* v
e = u - p       # error vector (orthogonal to v)

def create_vector_trace(coordinates, color, label, opacity=1.0):
    x, y = coordinates
    return go.Scatter(
        x=[0, x], 
        y=[0, y],
        mode='lines+markers',
        line=dict(color=color, width=4),
        marker=dict(
            size=[0, 16],
            color=[color, color],
            symbol=['circle', 'arrow'],
            angleref='previous'
        ),
        hovertemplate='(%{x}, %{y})<extra></extra>',
        showlegend=False,
        name=label,
        opacity=opacity
    )

def create_error_trace(start_coords, end_coords, color, label, opacity=1.0):
    return go.Scatter(
        x=[start_coords[0], end_coords[0]], 
        y=[start_coords[1], end_coords[1]],
        mode='lines+markers',
        line=dict(color=color, width=3, dash='dot'),
        marker=dict(
            size=[0, 12],
            color=[color, color],
            symbol=['circle', 'arrow'],
            angleref='previous'
        ),
        hovertemplate='(%{x}, %{y})<extra></extra>',
        showlegend=False,
        name=label,
        opacity=opacity
    )

def plot_static_projection():
    traces = []

    traces.append(create_vector_trace(tuple(u), 'orange', r'$\vec u$'))
    traces.append(create_vector_trace(tuple(v), '#3d81f6', r'$\vec v$', opacity=0.5))
    traces.append(create_vector_trace(tuple(p), '#004d40', r'$\vec p = k^* \vec v$'))
    traces.append(create_error_trace(tuple(p), tuple(u), '#d81b60', r'$\vec e$'))

    v_unit = v / np.linalg.norm(v)
    e_unit = e / np.linalg.norm(e)
    marker_size = 0.12

    p0 = p
    p1 = p0 + -v_unit * marker_size
    p2 = p1 + e_unit * marker_size

    right_angle_trace = go.Scatter(
        x=[p1[0], p2[0], p2[0] - (p1[0] - p0[0])],
        y=[p1[1], p2[1], p2[1] - (p1[1] - p0[1])],
        mode='lines',
        line=dict(color="#222", width=2),
        showlegend=False,
        hoverinfo='skip'
    )
    traces.append(right_angle_trace)
    
    min_x = min(0, p[0], u[0], v[0]) - 0.5
    max_x = max(0, p[0], u[0], v[0]) + 1.5
    min_y = min(0, p[1], u[1], v[1]) - 0.5
    max_y = max(0, p[1], u[1], v[1]) + 1.5

    top_right_corner = (max_x - 0.2, max_y - 0.2)

    fig = go.Figure(data=traces)
    
    fig.add_annotation(
        x=u[0],
        y=u[1] + 0.13,
        text=r"$\vec u$",
        showarrow=False,
        font=dict(size=18, family="Palatino, serif", color="orange"),
        align="center"
    )
    fig.add_annotation(
        x=v[0] + 0.13,
        y=v[1] - 0.13,
        text=r"$\vec v$",
        showarrow=False,
        font=dict(size=18, family="Palatino, serif", color="#3d81f6"),
        align="center"
    )
    fig.add_annotation(
        x=p[0] - 1,
        y=p[1] + 0.2,
        text=fr"$\vec p = k^* \vec v$",
        showarrow=False,
        font=dict(size=18, family="Palatino, serif", color="#004d40"),
        align="left"
    )
    fig.add_annotation(
        x=p[0] - 1,
        y=p[1] + 0.2,
        text=fr"$\vec p = k^* \vec v$",
        showarrow=False,
        font=dict(size=18, family="Palatino, serif", color="#004d40"),
        align="left"
    )
    fig.add_annotation(
        x=(p[0] + u[0]) / 2 + 0.21,
        y=(p[1] + u[1]) / 2 + 0.03,
        text=r"$\vec e$",
        showarrow=False,
        font=dict(size=18, family="Palatino, serif", color="#d81b60"),
        align="center"
    )
    fig.update_layout(
        width=480,
        height=420,
        yaxis_scaleanchor="x",
        margin=dict(l=10, r=10, t=10, b=10),
        font=dict(family="Palatino, serif"),
        plot_bgcolor="white",
        paper_bgcolor="white",
    )
    fig.update_xaxes(
        range=[min_x, max_x],
        showticklabels=False,
        gridcolor="#fff",
        zerolinecolor="#fff"
    )
    fig.update_yaxes(
        range=[min_y, max_y],
        showticklabels=False,
        gridcolor="#fff",
        zerolinecolor="#fff"
    )
    return fig

plot_static_projection().show(renderer='png', scale=3)

Loading...

We’ve defined the error vector as ${\color{#d81b60}\vec e} = {\color{orange}\vec u} - \vec p$ , and we know that ${\color{#d81b60}\vec e}$ is orthogonal to $\color{#3d81f6}\vec v$ . Rearranging the definition of the error vector gives us

{\color{orange} \vec u} = \underbrace{\vec p}_\text{parallel to $\color{#3d81f6}\vec v$} + \underbrace{{\color{#d81b60}\vec e}}_\text{orthogonal to $\color{#3d81f6}\vec v$}

All this says is that $\color{orange}\vec u$ is the sum of:

$\color{#004d40}\vec p$ , which is parallel to $\color{#3d81f6}\vec v$ (by definition of orthogonal projection)
$\color{#d81b60}\vec e$ , which is orthogonal to $\color{#3d81f6}\vec v$ (by definition of error vector)

Sometimes, we call this the orthogonal decomposition of $\color{orange}\vec u$ with respect to $\color{#3d81f6}\vec v$ . I’ll speak more about decompositions later in this section.

Examples¶

Let’s make things concrete by working through several examples. Each one was carefully chosen to illustrate something in particular.

We will work through several of these examples in lecture; attempt the ones that we don’t on your own.

Example: Fundamentals¶

Let $\vec u = \begin{bmatrix} 1 \\ 2 \\ 2 \end{bmatrix}$ and $\vec v = \begin{bmatrix} 1 \\ 1 \\ 1 \end{bmatrix}$ .

Find the orthogonal projection of $\vec u$ onto $\vec v$ .
Find the error vector, i.e. the vector $\vec e = \vec u - \vec p$ , and verify that it is orthogonal to $\vec v$ .
What is the length of the error vector (i.e. the projection error)?

Solution

Part 1

The orthogonal projection of $\vec u$ onto $\vec v$ is given by

\vec p = \frac{\vec u \cdot \vec v}{\vec v \cdot \vec v} \vec v

Following the same steps as in the previous example, we have

\vec u \cdot \vec v = \begin{bmatrix} 1 \\ 2 \\ 2 \end{bmatrix} \cdot \begin{bmatrix} 1 \\ 1 \\ 1 \end{bmatrix} = 1 + 2 + 2 = 5

\vec v \cdot \vec v = \begin{bmatrix} 1 \\ 1 \\ 1 \end{bmatrix} \cdot \begin{bmatrix} 1 \\ 1 \\ 1 \end{bmatrix} = 1 + 1 + 1 = 3

So, the orthogonal projection of $\vec u$ onto $\vec v$ is

\vec p = \frac{5}{3} \begin{bmatrix} 1 \\ 1 \\ 1 \end{bmatrix} = \begin{bmatrix} 5 / 3 \\ 5 / 3 \\ 5 / 3 \end{bmatrix}

Part 2

The error vector is

\vec e = \vec u - \vec p = \begin{bmatrix} 1 \\ 2 \\ 2 \end{bmatrix} - \begin{bmatrix} 5 / 3 \\ 5 / 3 \\ 5 / 3 \end{bmatrix} = \begin{bmatrix} 1 - 5 / 3 \\ 2 - 5 / 3 \\ 2 - 5 / 3 \end{bmatrix} = \begin{bmatrix} -2 / 3 \\ 1 / 3 \\ 1 / 3 \end{bmatrix}

To check if it’s orthogonal to $\vec v$ , we compute their dot product; we’re hoping it’s 0.

\vec e \cdot \vec v = \begin{bmatrix} -2 / 3 \\ 1 / 3 \\ 1 / 3 \end{bmatrix} \cdot \begin{bmatrix} 1 \\ 1 \\ 1 \end{bmatrix} = -2 / 3 + 1 / 3 + 1 / 3 = 0

So, the error vector $\vec e$ is orthogonal to $\vec v$ .

Part 3

The length of the error vector is

\begin{align*} \lVert \vec e \rVert &= \sqrt{(-2 / 3)^2 + (1 / 3)^2 + (1 / 3)^2} \\ &= \sqrt{4 / 9 + 1 / 9 + 1 / 9} \\ &= \sqrt{6 / 9} = \sqrt{2 / 3} \end{align*}

We might say $\sqrt{2 / 3}$ is the projection error. Another way of thinking of it is as the shortest distance from the point $(1, 2, 2)$ to the line that passes through $(0, 0, 0)$ and $(1, 1, 1)$ .

Example: The Line Perspective¶

Consider the points $p_1 = (1, 2, 2)$ and $p_2 = (1, 1, 1)$ in $\mathbb{R}^3$ .

What is the shortest distance between $p_1$ and the line that passes through $p_2$ and the origin, $(0, 0, 0)$ ?

Solution

The answer is $\sqrt{2 / 3}$ . This example didn’t require any addition math beyond the previous example; it just serves to remind you of the geometry of the situation. The set of all possible scalar multiples of $\vec v$ fill out a line in $\mathbb{R}^3$ , and that line passes through $(0, 0, 0)$ and $p_2 = (1, 1, 1)$ .

Why does that line pass through $(0, 0, 0)$ ? Consider the vector $0\vec v$ – it’s the zero vector!

Example: Which Order?¶

Let $\vec u = \begin{bmatrix} 1 \\ 2 \\ 2 \end{bmatrix}$ and $\vec v = \begin{bmatrix} 1 \\ 1 \\ 1 \end{bmatrix}$ .

In the first example, we found the orthogonal projection of $\vec u$ onto $\vec v$ .

Now, do the opposite: find the orthogonal projection of $\vec v$ onto $\vec u$ .

Solution

Now, we’re projecting $\vec v$ onto $\vec u$ , which means our answer is going to be a multiple of $\vec u$ , not $\vec v$ as in the first part.

The orthogonal projection of $\vec v$ onto $\vec u$ is given by

\vec p = \frac{\vec v \cdot \vec u}{\vec u \cdot \vec u} \vec u

The formula for the scalar in front of $\vec u$ is the same as in Part 1, but with all $\vec v$ ’s replaced by $\vec u$ ’s and vice versa. The numerator is the same, since $\vec u \cdot \vec v = \vec v \cdot \vec u$ . The denominator is different; just remember that the denominator is the squared norm of the vector you’re projecting onto.

\vec v \cdot \vec u = \begin{bmatrix} 1 \\ 1 \\ 1 \end{bmatrix} \cdot \begin{bmatrix} 1 \\ 2 \\ 2 \end{bmatrix} = 1 + 2 + 2 = 5

\vec u \cdot \vec u = \begin{bmatrix} 1 \\ 2 \\ 2 \end{bmatrix} \cdot \begin{bmatrix} 1 \\ 2 \\ 2 \end{bmatrix} = 1 + 4 + 4 = 9

So, the orthogonal projection of $\vec v$ onto $\vec u$ is

\vec p = \frac{\vec v \cdot \vec u}{\vec u \cdot \vec u} \vec u = \frac{5}{9} \begin{bmatrix} 1 \\ 2 \\ 2 \end{bmatrix} = \begin{bmatrix} 5 / 9 \\ 10 / 9 \\ 10 / 9 \end{bmatrix}

Note that the corresponding error vector, $\vec e = \vec v - \vec p$ , is orthogonal to $\vec u$ (not $\vec v$ ), since $\vec u$ is the vector we projected onto.

Example: Unit Vectors¶

Let $\vec u = \begin{bmatrix} 4 \\ -1 \\ 2 \end{bmatrix}$ , $\vec v = \begin{bmatrix} 2 \\ 6 \\ -3 \end{bmatrix}$ and $\vec V = \begin{bmatrix} 2 / 7 \\ 6 / 7 \\ -3 / 7 \end{bmatrix}$ .

Find the orthogonal projection of $\vec u$ onto $\vec v$ .
Find the orthogonal projection of $\vec u$ onto $\vec V$ .
What do you notice about your answers to the above two parts?

Solution

Part 1

We know that the orthogonal projection of $\vec u$ onto $\vec v$ is given by

\vec p = \frac{\vec u \cdot \vec v}{\vec v \cdot \vec v} \vec v

Let’s compute the relevant dot products.

\vec u \cdot \vec v = \begin{bmatrix} 4 \\ -1 \\ 2 \end{bmatrix} \cdot \begin{bmatrix} 2 \\ 6 \\ -3 \end{bmatrix} = 4 \cdot 2 + (-1) \cdot 6 + 2 \cdot (-3) = 8 - 6 - 6 = -4

\vec v \cdot \vec v = \begin{bmatrix} 2 \\ 6 \\ -3 \end{bmatrix} \cdot \begin{bmatrix} 2 \\ 6 \\ -3 \end{bmatrix} = 2 \cdot 2 + 6 \cdot 6 + (-3) \cdot (-3) = 4 + 36 + 9 = 49

So, the orthogonal projection of $\vec u$ onto $\vec v$ is

\vec p = \frac{-4}{49} \begin{bmatrix} 2 \\ 6 \\ -3 \end{bmatrix} = \begin{bmatrix} -8 / 49 \\ -24 / 49 \\ 12 / 49 \end{bmatrix}

Part 2

Now, we need to find $\frac{\vec u \cdot \vec V}{\vec V \cdot \vec V} \vec V$ .

Let’s compute the relevant dot products.

\vec u \cdot \vec V = \begin{bmatrix} 4 \\ -1 \\ 2 \end{bmatrix} \cdot \begin{bmatrix} 2 / 7 \\ 6 / 7 \\ -3 / 7 \end{bmatrix} = \frac{8}{7} - \frac{6}{7} - \frac{6}{7} = -\frac{4}{7}

\vec V \cdot \vec V = \begin{bmatrix} 2 / 7 \\ 6 / 7 \\ -3 / 7 \end{bmatrix} \cdot \begin{bmatrix} 2 / 7 \\ 6 / 7 \\ -3 / 7 \end{bmatrix} = \frac{4}{49} + \frac{36}{49} + \frac{9}{49} = \frac{49}{49} = 1

So, the orthogonal projection of $\vec u$ onto $\vec V$ is

\vec p = \frac{-\frac{4}{7}}{1} \begin{bmatrix} 2 / 7 \\ 6 / 7 \\ -3 / 7 \end{bmatrix} = \begin{bmatrix} -8 / 49 \\ -24 / 49 \\ 12 / 49 \end{bmatrix}

Part 3

Notice that in both parts, the orthogonal projection $\vec p$ is the same! This is not a coincidence. Both vectors point in the same direction, meaning the set of possible vectors of the form $k \vec V$ is the same as the set of possible vectors of the form $k \vec v$ . Another way to think about this is that they both span the same line in $\mathbb{R}^3$ through the origin.

The difference between $\vec v$ and $\vec V$ is that $\vec V$ is a unit vector in the direction of $\vec v$ , meaning that it points in the same direction as $\vec v$ but has $\lVert \vec V \rVert = 1$ rather than $\lVert \vec v \rVert = 7$ .

What’s different is the scalar we need to multiply each vector by to get the orthogonal projection. In the case of the unit vector $\vec V$ , the number in front of $\vec V$ is $\frac{\vec u \cdot \vec V}{\vec V \cdot \vec V}$ , but since $\vec V \cdot \vec V = \lVert \vec V \rVert^2 = 1$ , this simplifies to $\vec u \cdot \vec V$ .

Example: Unit Vectors, Continued¶

Suppose $\vec u, \vec v \in \mathbb{R}^n$ . Let $\theta$ be the angle between $\vec u$ and $\vec v$ .

Show that the orthogonal projection of $\vec u$ onto $\vec v$ is equal to

\vec p = \left( \lVert \vec u \rVert \cos \theta \right) \frac{\vec v}{\lVert \vec v \rVert}

This is not a formula we’d use to actually compute $\vec p$ , since finding $\cos \theta$ is harder than using the dot product-based formula from above. But, what does this formula tell us about the relationship between $\vec u$ and $\vec p$ ?

Solution

Let’s start with the original formula for the orthogonal projection of $\vec u$ onto $\vec v$ .

\vec p = \frac{\vec u \cdot \vec v}{\vec v \cdot \vec v} \vec v

Using the fact that $\vec u \cdot \vec v = \lVert \vec u \rVert \lVert \vec v \rVert \cos \theta$ and $\vec v \cdot \vec v = \lVert \vec v \rVert^2$ , we can rewrite the formula as

\vec p = \frac{\lVert \vec u \rVert \lVert \vec v \rVert \cos \theta}{\lVert \vec v \rVert^2} \vec v = \frac{\lVert \vec u \rVert \cos \theta}{\lVert \vec v \rVert} \vec v = (\lVert \vec u \rVert \cos \theta) \frac{\vec v}{\lVert \vec v \rVert}

The parentheses around $\lVert \vec u \rVert \cos \theta$ don’t change the calculation, but they help with the interpretation. This shows us that we can think of the orthogonal projection of $\vec u$ onto $\vec v$ as a vector with:

a length of $\lVert \vec u \rVert \cos \theta$
in the direction of $\frac{\vec v}{\lVert \vec v \rVert}$ , which is a unit vector in the direction of $\vec v$

Example: Projecting onto an Orthogonal Vector¶

Let $\vec u = \begin{bmatrix} 4 \\ -1 \\ 2 \end{bmatrix}$ and $\vec v = \begin{bmatrix} -3 \\ 0 \\ 6 \end{bmatrix}$ .

$\vec u$ and $\vec v$ are orthogonal. What does this say about the orthogonal projection of $\vec u$ onto $\vec v$ ?

Solution

Since $\vec u \cdot \vec v = 0$ , the orthogonal projection of $\vec u$ onto $\vec v$ is the zero vector, $\vec 0$ .

p = \frac{\vec u \cdot \vec v}{\vec v \cdot \vec v} \vec v = \frac{0}{36} \vec v = \vec 0

Intuitively, $\vec u$ and $\vec v$ travel in totally different different directions. Travelling any amount of $\vec v$ will take you further away from $\vec u$ . So, it’s best to stick with the zero vector, $\vec 0$ .

Orthogonal Decomposition¶

To motivate the next section, let’s consider another example.

Let $\color{orange} \vec u = \begin{bmatrix} 3 \\ 5 \end{bmatrix}$ , $\color{#3d81f6} \vec v_1 = \begin{bmatrix} 6 \\ -2 \end{bmatrix}$ , and $\color{#3d81f6} \vec v_2 = \begin{bmatrix} 1 \\ 3 \end{bmatrix}$ .

from utils import plot_vectors

# Define the vectors
u = [3, 5]
v1 = [6, -2] 
v2 = [1, 3]

# Plot the vectors using plot_vectors function
vectors = [
    (u, "orange", r"$\vec u$"),
    (v1, "#3d81f6", r"$\vec v_1$"), 
    (v2, "#3d81f6", r"$\vec v_2$"),
]

fig = plot_vectors(vectors, vdeltax=0.3, vdeltay=0.3)
fig.update_layout(
    yaxis_scaleanchor="x",
    yaxis_constrain="domain",
    xaxis=dict(
        dtick=1,
        showgrid=True,
        gridcolor='#f0f0f0'
    ),
    yaxis=dict(
        dtick=1,
        showgrid=True,
        gridcolor='#f0f0f0'
    )
)
fig.show(renderer='png', scale=3)

Notice that $\color{#3d81f6}\vec v_1$ and $\color{#3d81f6}\vec v_2$ are orthogonal – that’s important to what we’re about to discover.

Let’s find the orthogonal projection of $\color{orange}\vec u$ onto $\color{#3d81f6}\vec v_1$ (called $\color{#004d40}\vec p_1$ ) and the orthogonal projection of $\color{orange}\vec u$ onto $\color{#3d81f6}\vec v_2$ (called $\color{#004d40}\vec p_2$ ).

{\color{#004d40}\vec p_1} = \frac{{\color{orange}\vec u} \cdot \color{#3d81f6}\vec v_1}{\color{#3d81f6}\vec v_1 \cdot \color{#3d81f6}\vec v_1} {\color{#3d81f6}\vec v_1} = \frac{8}{40} {\color{#3d81f6}\vec v_1} = \frac{1}{5} {\color{#3d81f6}\vec v_1} = {\color{#004d40}\begin{bmatrix} 6 / 5 \\ -2 / 5 \end{bmatrix}}

{\color{#004d40}\vec p_2} = \frac{{\color{orange}\vec u} \cdot \color{#3d81f6}\vec v_2}{\color{#3d81f6}\vec v_2 \cdot \color{#3d81f6}\vec v_2} {\color{#3d81f6}\vec v_2} = \frac{18}{10} {\color{#3d81f6}\vec v_2} = \frac{9}{5} {\color{#3d81f6} \vec v_2} = {\color{#004d40}\begin{bmatrix} 9 / 5 \\ 27 / 5 \end{bmatrix}}

Notice that $\color{orange}\vec u$ is the sum of $\color{#004d40}\vec p_1$ and $\color{#004d40}\vec p_2$ !

{\color{orange}\vec u} = {\color{#004d40}\vec p_1} + {\color{#004d40}\vec p_2} = \underbrace{\frac{1}{5} {\color{#3d81f6}\vec v_1} + \frac{9}{5} {\color{#3d81f6}\vec v_2}}_{\text{linear combination}} = \begin{bmatrix} 6 / 5 \\ -2 / 5 \end{bmatrix} + \begin{bmatrix} 9 / 5 \\ 27 / 5 \end{bmatrix} = \begin{bmatrix} 3 \\ 5 \end{bmatrix}

from utils import plot_vectors_non_origin

# Define the vectors
u = [3, 5]
v1 = [6, -2] 
v2 = [1, 3]
p1 = [6 / 5, -2 / 5]
p2 = [9 / 5, 27 / 5]

# Prepare vectors for plot_vectors_non_origin
vectors = [
    # ((start_coords, end_coords), color, label)
    (([0, 0], u), "orange", r"$\vec u$"),
    (([0, 0], v1), "#3d81f6", r"$\vec v_1$"),
    (([0, 0], p1), "#004d40", r"$\vec p_1$"),
    (([0, 0], v2), "#3d81f6", r"$\vec v_2$"),
    ((p1, list(np.array(p1) + np.array(p2))), "#004d40", r"$\vec p_2$"),
]

fig = plot_vectors_non_origin(vectors, vdeltax=0.3, vdeltay=0.3)
fig.update_layout(
    yaxis_scaleanchor="x",
    yaxis_constrain="domain",
    xaxis=dict(
        dtick=1,
        showgrid=True,
        gridcolor='#f0f0f0'
    ),
    yaxis=dict(
        dtick=1,
        showgrid=True,
        gridcolor='#f0f0f0'
    )
)
fig.show(renderer='png', scale=3)

Why is the sum of $\color{#004d40}\vec p_1$ and $\color{#004d40}\vec p_2$ equal to $\color{orange}\vec u$ ? Earlier, I mentioned that we can use orthogonal projections to decompose vectors. Here, when we project $\color{orange}\vec u$ onto $\color{#3d81f6}\vec v_1$ , the corresponding error vector $\color{#d81b60}\vec e_1$ is orthogonal to $\color{#3d81f6}\vec v_1$ .

{\color{orange} \vec u} = \underbrace{{\color{#004d40}\vec p_1}}_{\text{parallel to } \color{#3d81f6}\vec v_1} + \underbrace{{\color{#d81b60}\vec e_1}}_{\text{orthogonal to } \color{#3d81f6}\vec v_1}

By projecting $\color{orange}\vec u$ onto $\color{#3d81f6}\vec v_2$ , we can recreate the error vector exactly, meaning ${\color{#d81b60}\vec e_1} = \color{#004d40}\vec p_2$ .

Taking a step back, the fact that $\color{#3d81f6} v_1$ and $\color{#3d81f6} v_2$ are orthogonal meant that writing $\color{orange}\vec u$ as a linear combination of $\color{#3d81f6} v_1$ and $\color{#3d81f6} v_2$ was easy.

a_1 {\color{#3d81f6} \underbrace{\begin{bmatrix} 6 \\ -2 \end{bmatrix}}_{\vec v_1}} + a_2 {\color{#3d81f6} \underbrace{\begin{bmatrix} 1 \\ 3 \end{bmatrix}}_{\vec v_2}} = \color{orange} \underbrace{\begin{bmatrix} 3 \\ 5 \end{bmatrix}}_{\vec u}

If $\color{#3d81f6} v_1$ and $\color{#3d81f6} v_2$ were not orthogonal, then writing $\color{orange}\vec u$ as a linear combination of $\color{#3d81f6} v_1$ and $\color{#3d81f6} v_2$ would have involved solving a system of 2 equations and 2 unknowns, as we’ve had to do in previous sections.

For instance, if we keep $\color{orange} \begin{bmatrix} 3 \\ 5 \end{bmatrix}$ and look at $\color{#3d81f6} \vec x_1 = \begin{bmatrix} 1 \\ 2 \end{bmatrix}$ and $\color{#3d81f6} \vec x_2 = \begin{bmatrix} 2 \\ 1 \end{bmatrix}$ , we have that

the projection of $\color{orange}\vec u$ onto $\color{#3d81f6} \vec x_1$ is $\left( \frac{{\color{orange}\vec u} \cdot \color{#3d81f6} \vec x_1}{{\color{#3d81f6} \vec x_1} \cdot \color{#3d81f6} \vec x_1} \right) {\color{#3d81f6} \vec x_1} = \frac{13}{5} \color{#3d81f6} \vec x_1$
the projection of $\color{orange}\vec u$ onto $\color{#3d81f6} \vec x_2$ is $\left( \frac{{\color{orange}\vec u} \cdot \color{#3d81f6} \vec x_2}{{\color{#3d81f6} \vec x_2} \cdot \color{#3d81f6} \vec x_2} \right) {\color{#3d81f6} \vec x_2} = \frac{11}{5} \color{#3d81f6} \vec x_2$

but

\frac{13}{5} {\color{#3d81f6} \vec x_1} + \frac{11}{5} {\color{#3d81f6} \vec x_2} = \frac{13}{5} \begin{bmatrix} 1 \\ 2 \end{bmatrix} + \frac{11}{5} \begin{bmatrix} 2 \\ 1 \end{bmatrix} = \begin{bmatrix} 13 / 5 + 22 / 5 \\ 26 / 5 + 11 / 5 \end{bmatrix} = \begin{bmatrix} 7 \\ 7.4 \end{bmatrix} \neq {\color{orange}\vec u}

I’d like to provide a more general “theorem”, on when you can use orthogonal projections to more easily write a vector $\color{orange}\vec u$ as a linear combination of vectors $\color{#3d81f6} \vec v_1$ , $\color{#3d81f6} \vec v_2$ , ..., $\color{#3d81f6} \vec v_d$ , but we’ll need to first study the idea of a basis. That’s to come.

This section has been light on activities, since it provided many examples that we’ll work through in lecture. But, here’s one to tie this last point together.

What’s Next?¶

The motivating problem for this section was the approximation problem, which asked us to find the best approximation of a vector $\color{orange}\vec u$ using only a scalar multiple of a vector $\color{#3d81f6}\vec v$ .

The next natural step is to consider the case where we want to approximate $\color{orange}\vec u$ using a linear combination of more than one vector, $\color{#3d81f6} \vec v_1$ , $\color{#3d81f6} \vec v_2$ , ..., $\color{#3d81f6} \vec v_d$ . Why? Remember, this all connects back to the problem of linear regression. The more vectors we have as “building blocks” in our linear combination, the more features our model will be able to use. (I haven’t made the connection from linear algebra to linear regression yet, but just know this is why we’re studying projections.)

For example, let’s consider $\color{orange}\vec u = \begin{bmatrix} 1 \\ 3 \\ 6 \end{bmatrix}$ , $\color{#3d81f6} \vec v_1 = \begin{bmatrix} 1 \\ 0 \\ 1 \end{bmatrix}$ , and $\color{#3d81f6} \vec v_2 = \begin{bmatrix} 2 \\ 2 \\ 1 \end{bmatrix}$ . Among all linear combinations of $\color{#3d81f6} \vec v_1$ and $\color{#3d81f6} \vec v_2$ , which one is closest to $\color{orange}\vec u$ ?

To answer this question, we’d need to find the scalars $a_1$ and $a_2$ such that the error vector

{\color{#d81b60}\vec e} = {\color{orange}\vec u} - {(a_1 {\color{#3d81f6} \vec v_1} + a_2 {\color{#3d81f6} \vec v_2})}

has minimal length, which presumably happens when $\color{#d81b60}\vec e$ is orthogonal to both $\color{#3d81f6} \vec v_1$ and $\color{#3d81f6} \vec v_2$ .

Travelling down this road, we might be able to find the values of $a_1$ and $a_2$ that minimize the length of $\color{#d81b60}\vec e$ . But then we’ll want to ask how we can do this for any $\color{orange}\vec u$ and any set of vectors $\color{#3d81f6} \vec v_1$ , $\color{#3d81f6} \vec v_2$ , ..., $\color{#3d81f6} \vec v_d$ , and it seems like we’ll need a more general solution. In general, to find the “best” approximation of $\color{orange}\vec u$ using a linear combination of $\color{#3d81f6} \vec v_1$ , $\color{#3d81f6} \vec v_2$ , ..., $\color{#3d81f6} \vec v_d$ , we’ll need to know about matrices. We’ll introduce matrices in Chapter 2.5.

Instead, in Chapter 2.4, we will set aside the goal of projections temporarily, and instead focus on truly understanding the set of possible linear combinations of a given set of vectors. For example, the vectors $\color{#3d81f6} \vec v_1 = \begin{bmatrix} 1 \\ 0 \\ 0 \end{bmatrix}$ , $\color{#3d81f6} \vec v_2 = \begin{bmatrix} -5 \\ 1 \\ 8 \end{bmatrix}$ from earlier define a plane. So, asking which linear combination of $\color{#3d81f6} \vec v_1$ and $\color{#3d81f6} \vec v_2$ is closest to $\color{orange}\vec u$ is equivalent to asking which point on the plane is closest to $\color{orange}\vec u$ .

from utils import plot_vectors
import numpy as np
import plotly.graph_objects as go

# Define the vectors
u = (1, 3, 6)
v1 = (1, 0, 1)
v2 = (2, 2, 1)

# Plot the vectors using plot_vectors function
vectors = [
    (u, "orange", r"u"),
    (v1, "#3d81f6", r"v₁"), 
    (v2, "#3d81f6", r"v₂"),
]

fig = plot_vectors(vectors, show_axis_labels=True, vdeltay=1)

# Make the plane look more rectangular by using a smaller, symmetric range for s and t
plane_extent = 20  # controls the "size" of the rectangle
num_points = 3   # fewer points for a cleaner rectangle

s_range = np.linspace(-plane_extent, plane_extent, num_points)
t_range = np.linspace(-plane_extent, plane_extent, num_points)
s_grid, t_grid = np.meshgrid(s_range, t_range)

plane_x = s_grid * v1[0] + t_grid * v2[0]
plane_y = s_grid * v1[1] + t_grid * v2[1]
plane_z = s_grid * v1[2] + t_grid * v2[2]

fig.add_trace(go.Surface(
    x=plane_x,
    y=plane_y,
    z=plane_z,
    opacity=0.8,
    colorscale=[[0, 'rgba(61,129,246,0.3)'], [1, 'rgba(61,129,246,0.3)']],
    showscale=False,
))

fig.update_layout(
    scene_camera=dict(
        eye=dict(x=0.8, y=2, z=1.2)
    ),
    scene=dict(
        zaxis=dict(range=[-3, 4])  # ensure z-axis hits -3
    ),
)

fig.show()

Loading...

Chapter 2.4 will answer the questions, “why do $\color{#3d81f6} \vec v_1$ and $\color{#3d81f6} \vec v_2$ define a plane?”, “which plane do they define?”, and “in general, what do $\color{#3d81f6} \vec v_1$ , $\color{#3d81f6} \vec v_2$ , ..., $\color{#3d81f6} \vec v_d$ , all in $\mathbb{R}^n$ , define?”

The Approximation Problem¶

Orthogonal Projections¶

Examples¶

Example: Fundamentals¶

Example: The Line Perspective¶

Example: Which Order?¶

Example: Unit Vectors¶

Example: Unit Vectors, Continued¶

Example: Projecting onto an Orthogonal Vector¶

Orthogonal Decomposition¶

What’s Next?¶