Close Menu
    Main Menu
    • Home
    • News
    • Tech
    • Robotics
    • ML & Research
    • AI
    • Digital Transformation
    • AI Ethics & Regulation
    • Thought Leadership in AI

    Subscribe to Updates

    Get the latest creative news from FooBar about art, design and business.

    What's Hot

    AI use is altering how a lot firms pay for cyber insurance coverage

    March 12, 2026

    AI-Powered Cybercrime Is Surging. The US Misplaced $16.6 Billion in 2024.

    March 12, 2026

    Setting Up a Google Colab AI-Assisted Coding Surroundings That Really Works

    March 12, 2026
    Facebook X (Twitter) Instagram
    UK Tech InsiderUK Tech Insider
    Facebook X (Twitter) Instagram
    UK Tech InsiderUK Tech Insider
    Home»Machine Learning & Research»A Light Introduction to Language Mannequin Nice-tuning
    Machine Learning & Research

    A Light Introduction to Language Mannequin Nice-tuning

    Oliver ChambersBy Oliver ChambersJanuary 11, 2026No Comments4 Mins Read
    Facebook Twitter Pinterest Telegram LinkedIn Tumblr Email Reddit
    A Light Introduction to Language Mannequin Nice-tuning
    Share
    Facebook Twitter LinkedIn Pinterest Email Copy Link


    import dataclasses

     

    import tokenizers

    import torch

    import torch.nn as nn

    import torch.nn.practical as F

    from torch import Tensor

     

     

    # Mannequin structure similar as coaching script

    @dataclasses.dataclass

    class LlamaConfig:

        “”“Outline Llama mannequin hyperparameters.”“”

        vocab_size: int = 50000

        max_position_embeddings: int = 2048

        hidden_size: int = 768

        intermediate_size: int = 4*768

        num_hidden_layers: int = 12

        num_attention_heads: int = 12

        num_key_value_heads: int = 3

     

    class RotaryPositionEncoding(nn.Module):

        “”“Rotary place encoding.”“”

     

        def __init__(self, dim: int, max_position_embeddings: int) -> None:

            tremendous().__init__()

            self.dim = dim

            self.max_position_embeddings = max_position_embeddings

            N = 10_000.0

            inv_freq = 1.0 / (N ** (torch.arange(0, dim, 2) / dim))

            inv_freq = torch.cat((inv_freq, inv_freq), dim=–1)

            place = torch.arange(max_position_embeddings)

            sinusoid_inp = torch.outer(place, inv_freq)

            self.register_buffer(“cos”, sinusoid_inp.cos())

            self.register_buffer(“sin”, sinusoid_inp.sin())

     

        def ahead(self, x: Tensor) -> Tensor:

            batch_size, seq_len, num_heads, head_dim = x.form

            gadget = x.gadget

            dtype = x.dtype

            cos = self.cos.to(gadget, dtype)[:seq_len].view(1, seq_len, 1, –1)

            sin = self.sin.to(gadget, dtype)[:seq_len].view(1, seq_len, 1, –1)

            x1, x2 = x.chunk(2, dim=–1)

            rotated = torch.cat((–x2, x1), dim=–1)

            return (x * cos) + (rotated * sin)

     

    class LlamaAttention(nn.Module):

        “”“Grouped-query consideration with rotary embeddings.”“”

     

        def __init__(self, config: LlamaConfig) -> None:

            tremendous().__init__()

            self.hidden_size = config.hidden_size

            self.num_heads = config.num_attention_heads

            self.head_dim = self.hidden_size // self.num_heads

            self.num_kv_heads = config.num_key_value_heads

            assert (self.head_dim * self.num_heads) == self.hidden_size

     

            self.q_proj = nn.Linear(self.hidden_size, self.num_heads * self.head_dim, bias=False)

            self.k_proj = nn.Linear(self.hidden_size, self.num_kv_heads * self.head_dim, bias=False)

            self.v_proj = nn.Linear(self.hidden_size, self.num_kv_heads * self.head_dim, bias=False)

            self.o_proj = nn.Linear(self.num_heads * self.head_dim, self.hidden_size, bias=False)

     

        def ahead(self, hidden_states: Tensor, rope: RotaryPositionEncoding) -> Tensor:

            bs, seq_len, dim = hidden_states.dimension()

     

            query_states = self.q_proj(hidden_states).view(bs, seq_len, self.num_heads, self.head_dim)

            key_states = self.k_proj(hidden_states).view(bs, seq_len, self.num_kv_heads, self.head_dim)

            value_states = self.v_proj(hidden_states).view(bs, seq_len, self.num_kv_heads, self.head_dim)

     

            attn_output = F.scaled_dot_product_attention(

                rope(query_states).transpose(1, 2),

                rope(key_states).transpose(1, 2),

                value_states.transpose(1, 2),

                is_causal=True,

                dropout_p=0.0,

                enable_gqa=True,

            )

     

            attn_output = attn_output.transpose(1, 2).reshape(bs, seq_len, self.hidden_size)

            return self.o_proj(attn_output)

     

    class LlamaMLP(nn.Module):

        “”“Feed-forward community with SwiGLU activation.”“”

     

        def __init__(self, config: LlamaConfig) -> None:

            tremendous().__init__()

            self.gate_proj = nn.Linear(config.hidden_size, config.intermediate_size, bias=False)

            self.up_proj = nn.Linear(config.hidden_size, config.intermediate_size, bias=False)

            self.act_fn = F.silu

            self.down_proj = nn.Linear(config.intermediate_size, config.hidden_size, bias=False)

     

        def ahead(self, x: Tensor) -> Tensor:

            gate = self.act_fn(self.gate_proj(x))

            up = self.up_proj(x)

            return self.down_proj(gate * up)

     

    class LlamaDecoderLayer(nn.Module):

        “”“Single transformer layer for a Llama mannequin.”“”

     

        def __init__(self, config: LlamaConfig) -> None:

            tremendous().__init__()

            self.input_layernorm = nn.RMSNorm(config.hidden_size, eps=1e–5)

            self.self_attn = LlamaAttention(config)

            self.post_attention_layernorm = nn.RMSNorm(config.hidden_size, eps=1e–5)

            self.mlp = LlamaMLP(config)

     

        def ahead(self, hidden_states: Tensor, rope: RotaryPositionEncoding) -> Tensor:

            residual = hidden_states

            hidden_states = self.input_layernorm(hidden_states)

            attn_outputs = self.self_attn(hidden_states, rope=rope)

            hidden_states = attn_outputs + residual

     

            residual = hidden_states

            hidden_states = self.post_attention_layernorm(hidden_states)

            return self.mlp(hidden_states) + residual

     

    class LlamaModel(nn.Module):

        “”“The complete Llama mannequin with none pretraining heads.”“”

     

        def __init__(self, config: LlamaConfig) -> None:

            tremendous().__init__()

            self.rotary_emb = RotaryPositionEncoding(

                config.hidden_size // config.num_attention_heads,

                config.max_position_embeddings,

            )

     

            self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size)

            self.layers = nn.ModuleList([

                LlamaDecoderLayer(config) for _ in range(config.num_hidden_layers)

            ])

            self.norm = nn.RMSNorm(config.hidden_size, eps=1e–5)

     

        def ahead(self, input_ids: Tensor) -> Tensor:

            hidden_states = self.embed_tokens(input_ids)

            for layer in self.layers:

                hidden_states = layer(hidden_states, rope=self.rotary_emb)

            return self.norm(hidden_states)

     

    class LlamaForPretraining(nn.Module):

        def __init__(self, config: LlamaConfig) -> None:

            tremendous().__init__()

            self.base_model = LlamaModel(config)

            self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)

     

        def ahead(self, input_ids: Tensor) -> Tensor:

            hidden_states = self.base_model(input_ids)

            return self.lm_head(hidden_states)

     

     

    def apply_repetition_penalty(logits: Tensor, tokens: listing[int], penalty: float) -> Tensor:

        “”“Apply repetition penalty to the logits.”“”

        for tok in tokens:

            if logits[tok] > 0:

                logits[tok] /= penalty

            else:

                logits[tok] *= penalty

        return logits

     

     

    @torch.no_grad()

    def generate(mannequin, tokenizer, immediate, max_tokens=100, temperature=1.0, repetition_penalty=1.0,

                 repetition_penalty_range=10, top_k=50, gadget=None) -> str:

        “”“Generate textual content autoregressively from a immediate.

     

        Args:

            mannequin: The educated LlamaForPretraining mannequin

            tokenizer: The tokenizer

            immediate: Enter textual content immediate

            max_tokens: Most variety of tokens to generate

            temperature: Sampling temperature (larger = extra random)

            repetition_penalty: Penalty for repeating tokens

            repetition_penalty_range: Variety of earlier tokens to think about for repetition penalty

            top_k: Solely pattern from prime okay most probably tokens

            gadget: System the mannequin is loaded on

     

        Returns:

            Generated textual content

        ““”

        # Flip mannequin to analysis mode: Norm layer will work otherwise

        mannequin.eval()

     

        # Get particular token IDs

        bot_id = tokenizer.token_to_id(“[BOT]”)

        eot_id = tokenizer.token_to_id(“[EOT]”)

     

        # Tokenize the immediate into integer tensor

        prompt_tokens = [bot_id] + tokenizer.encode(” “ + immediate).ids

        input_ids = torch.tensor([prompt_tokens], dtype=torch.int64, gadget=gadget)

     

        # Recursively generate tokens

        generated_tokens = []

        for _step in vary(max_tokens):

            # Ahead go by means of mannequin

            logits = mannequin(input_ids)

     

            # Get logits for the final token

            next_token_logits = logits[0, –1, :] / temperature

     

            # Apply repetition penalty

            if repetition_penalty != 1.0 and len(generated_tokens) > 0:

                next_token_logits = apply_repetition_penalty(

                    next_token_logits,

                    generated_tokens[–repetition_penalty_range:],

                    repetition_penalty,

                )

     

            # Apply top-k filtering

            if top_k > 0:

                top_k_logits = torch.topk(next_token_logits, top_k)[0]

                indices_to_remove = next_token_logits < top_k_logits[–1]

                next_token_logits[indices_to_remove] = float(“-inf”)

     

            # Pattern from the filtered distribution

            probs = F.softmax(next_token_logits, dim=–1)

            next_token = torch.multinomial(probs, num_samples=1)

     

            # Early cease if EOT token is generated

            if next_token.merchandise() == eot_id:

                break

     

            # Append the brand new token to input_ids for subsequent iteration

            input_ids = torch.cat([input_ids, next_token.unsqueeze(0)], dim=1)

            generated_tokens.append(next_token.merchandise())

     

        # Decode all generated tokens

        return tokenizer.decode(generated_tokens)

     

     

    checkpoint = “llama_model_final.pth”   # saved mannequin checkpoint

    tokenizer = “bpe_50K.json”   # saved tokenizer

    max_tokens = 100

    temperature = 0.9

    top_k = 50

    penalty = 1.1

    penalty_range = 10

     

    # Load tokenizer and mannequin

    gadget = torch.gadget(“cuda” if torch.cuda.is_available() else “cpu”)

    tokenizer = tokenizers.Tokenizer.from_file(tokenizer)

    config = LlamaConfig()

    mannequin = LlamaForPretraining(config).to(gadget)

    mannequin.load_state_dict(torch.load(checkpoint, map_location=gadget))

     

    immediate = “As soon as upon a time, there was”

    response = generate(

        mannequin=mannequin,

        tokenizer=tokenizer,

        immediate=immediate,

        max_tokens=max_tokens,

        temperature=temperature,

        top_k=top_k,

        repetition_penalty=penalty,

        repetition_penalty_range=penalty_range,

        gadget=gadget,

    )

    print(immediate)

    print(“-“ * 20)

    print(response)

    Share. Facebook Twitter Pinterest LinkedIn Tumblr Email
    Oliver Chambers
    • Website

    Related Posts

    Setting Up a Google Colab AI-Assisted Coding Surroundings That Really Works

    March 12, 2026

    We ran 16 AI Fashions on 9,000+ Actual Paperwork. Here is What We Discovered.

    March 12, 2026

    Quick Paths and Sluggish Paths – O’Reilly

    March 11, 2026
    Top Posts

    Evaluating the Finest AI Video Mills for Social Media

    April 18, 2025

    Utilizing AI To Repair The Innovation Drawback: The Three Step Resolution

    April 18, 2025

    Midjourney V7: Quicker, smarter, extra reasonable

    April 18, 2025

    Meta resumes AI coaching utilizing EU person knowledge

    April 18, 2025
    Don't Miss

    AI use is altering how a lot firms pay for cyber insurance coverage

    By Declan MurphyMarch 12, 2026

    In July 2025, McDonald’s had an surprising downside on the menu, one involving McHire, its…

    AI-Powered Cybercrime Is Surging. The US Misplaced $16.6 Billion in 2024.

    March 12, 2026

    Setting Up a Google Colab AI-Assisted Coding Surroundings That Really Works

    March 12, 2026

    Pricing Breakdown and Core Characteristic Overview

    March 12, 2026
    Stay In Touch
    • Facebook
    • Twitter
    • Pinterest
    • Instagram
    • YouTube
    • Vimeo

    Subscribe to Updates

    Get the latest creative news from SmartMag about art & design.

    UK Tech Insider
    Facebook X (Twitter) Instagram
    • About Us
    • Contact Us
    • Privacy Policy
    • Terms Of Service
    • Our Authors
    © 2026 UK Tech Insider. All rights reserved by UK Tech Insider.

    Type above and press Enter to search. Press Esc to cancel.