Close Menu
    Main Menu
    • Home
    • News
    • Tech
    • Robotics
    • ML & Research
    • AI
    • Digital Transformation
    • AI Ethics & Regulation
    • Thought Leadership in AI

    Subscribe to Updates

    Get the latest creative news from FooBar about art, design and business.

    What's Hot

    High 7 AI Agent Orchestration Frameworks

    March 12, 2026

    iRobot is bringing the Roomba Mini to the U.Ok. and Europe

    March 12, 2026

    AI use is altering how a lot firms pay for cyber insurance coverage

    March 12, 2026
    Facebook X (Twitter) Instagram
    UK Tech InsiderUK Tech Insider
    Facebook X (Twitter) Instagram
    UK Tech InsiderUK Tech Insider
    Home»Machine Learning & Research»A Light Introduction to Language Mannequin Nice-tuning
    Machine Learning & Research

    A Light Introduction to Language Mannequin Nice-tuning

    Oliver ChambersBy Oliver ChambersJanuary 11, 2026No Comments4 Mins Read
    Facebook Twitter Pinterest Telegram LinkedIn Tumblr Email Reddit
    A Light Introduction to Language Mannequin Nice-tuning
    Share
    Facebook Twitter LinkedIn Pinterest Email Copy Link


    import dataclasses

     

    import tokenizers

    import torch

    import torch.nn as nn

    import torch.nn.practical as F

    from torch import Tensor

     

     

    # Mannequin structure similar as coaching script

    @dataclasses.dataclass

    class LlamaConfig:

        “”“Outline Llama mannequin hyperparameters.”“”

        vocab_size: int = 50000

        max_position_embeddings: int = 2048

        hidden_size: int = 768

        intermediate_size: int = 4*768

        num_hidden_layers: int = 12

        num_attention_heads: int = 12

        num_key_value_heads: int = 3

     

    class RotaryPositionEncoding(nn.Module):

        “”“Rotary place encoding.”“”

     

        def __init__(self, dim: int, max_position_embeddings: int) -> None:

            tremendous().__init__()

            self.dim = dim

            self.max_position_embeddings = max_position_embeddings

            N = 10_000.0

            inv_freq = 1.0 / (N ** (torch.arange(0, dim, 2) / dim))

            inv_freq = torch.cat((inv_freq, inv_freq), dim=–1)

            place = torch.arange(max_position_embeddings)

            sinusoid_inp = torch.outer(place, inv_freq)

            self.register_buffer(“cos”, sinusoid_inp.cos())

            self.register_buffer(“sin”, sinusoid_inp.sin())

     

        def ahead(self, x: Tensor) -> Tensor:

            batch_size, seq_len, num_heads, head_dim = x.form

            gadget = x.gadget

            dtype = x.dtype

            cos = self.cos.to(gadget, dtype)[:seq_len].view(1, seq_len, 1, –1)

            sin = self.sin.to(gadget, dtype)[:seq_len].view(1, seq_len, 1, –1)

            x1, x2 = x.chunk(2, dim=–1)

            rotated = torch.cat((–x2, x1), dim=–1)

            return (x * cos) + (rotated * sin)

     

    class LlamaAttention(nn.Module):

        “”“Grouped-query consideration with rotary embeddings.”“”

     

        def __init__(self, config: LlamaConfig) -> None:

            tremendous().__init__()

            self.hidden_size = config.hidden_size

            self.num_heads = config.num_attention_heads

            self.head_dim = self.hidden_size // self.num_heads

            self.num_kv_heads = config.num_key_value_heads

            assert (self.head_dim * self.num_heads) == self.hidden_size

     

            self.q_proj = nn.Linear(self.hidden_size, self.num_heads * self.head_dim, bias=False)

            self.k_proj = nn.Linear(self.hidden_size, self.num_kv_heads * self.head_dim, bias=False)

            self.v_proj = nn.Linear(self.hidden_size, self.num_kv_heads * self.head_dim, bias=False)

            self.o_proj = nn.Linear(self.num_heads * self.head_dim, self.hidden_size, bias=False)

     

        def ahead(self, hidden_states: Tensor, rope: RotaryPositionEncoding) -> Tensor:

            bs, seq_len, dim = hidden_states.dimension()

     

            query_states = self.q_proj(hidden_states).view(bs, seq_len, self.num_heads, self.head_dim)

            key_states = self.k_proj(hidden_states).view(bs, seq_len, self.num_kv_heads, self.head_dim)

            value_states = self.v_proj(hidden_states).view(bs, seq_len, self.num_kv_heads, self.head_dim)

     

            attn_output = F.scaled_dot_product_attention(

                rope(query_states).transpose(1, 2),

                rope(key_states).transpose(1, 2),

                value_states.transpose(1, 2),

                is_causal=True,

                dropout_p=0.0,

                enable_gqa=True,

            )

     

            attn_output = attn_output.transpose(1, 2).reshape(bs, seq_len, self.hidden_size)

            return self.o_proj(attn_output)

     

    class LlamaMLP(nn.Module):

        “”“Feed-forward community with SwiGLU activation.”“”

     

        def __init__(self, config: LlamaConfig) -> None:

            tremendous().__init__()

            self.gate_proj = nn.Linear(config.hidden_size, config.intermediate_size, bias=False)

            self.up_proj = nn.Linear(config.hidden_size, config.intermediate_size, bias=False)

            self.act_fn = F.silu

            self.down_proj = nn.Linear(config.intermediate_size, config.hidden_size, bias=False)

     

        def ahead(self, x: Tensor) -> Tensor:

            gate = self.act_fn(self.gate_proj(x))

            up = self.up_proj(x)

            return self.down_proj(gate * up)

     

    class LlamaDecoderLayer(nn.Module):

        “”“Single transformer layer for a Llama mannequin.”“”

     

        def __init__(self, config: LlamaConfig) -> None:

            tremendous().__init__()

            self.input_layernorm = nn.RMSNorm(config.hidden_size, eps=1e–5)

            self.self_attn = LlamaAttention(config)

            self.post_attention_layernorm = nn.RMSNorm(config.hidden_size, eps=1e–5)

            self.mlp = LlamaMLP(config)

     

        def ahead(self, hidden_states: Tensor, rope: RotaryPositionEncoding) -> Tensor:

            residual = hidden_states

            hidden_states = self.input_layernorm(hidden_states)

            attn_outputs = self.self_attn(hidden_states, rope=rope)

            hidden_states = attn_outputs + residual

     

            residual = hidden_states

            hidden_states = self.post_attention_layernorm(hidden_states)

            return self.mlp(hidden_states) + residual

     

    class LlamaModel(nn.Module):

        “”“The complete Llama mannequin with none pretraining heads.”“”

     

        def __init__(self, config: LlamaConfig) -> None:

            tremendous().__init__()

            self.rotary_emb = RotaryPositionEncoding(

                config.hidden_size // config.num_attention_heads,

                config.max_position_embeddings,

            )

     

            self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size)

            self.layers = nn.ModuleList([

                LlamaDecoderLayer(config) for _ in range(config.num_hidden_layers)

            ])

            self.norm = nn.RMSNorm(config.hidden_size, eps=1e–5)

     

        def ahead(self, input_ids: Tensor) -> Tensor:

            hidden_states = self.embed_tokens(input_ids)

            for layer in self.layers:

                hidden_states = layer(hidden_states, rope=self.rotary_emb)

            return self.norm(hidden_states)

     

    class LlamaForPretraining(nn.Module):

        def __init__(self, config: LlamaConfig) -> None:

            tremendous().__init__()

            self.base_model = LlamaModel(config)

            self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)

     

        def ahead(self, input_ids: Tensor) -> Tensor:

            hidden_states = self.base_model(input_ids)

            return self.lm_head(hidden_states)

     

     

    def apply_repetition_penalty(logits: Tensor, tokens: listing[int], penalty: float) -> Tensor:

        “”“Apply repetition penalty to the logits.”“”

        for tok in tokens:

            if logits[tok] > 0:

                logits[tok] /= penalty

            else:

                logits[tok] *= penalty

        return logits

     

     

    @torch.no_grad()

    def generate(mannequin, tokenizer, immediate, max_tokens=100, temperature=1.0, repetition_penalty=1.0,

                 repetition_penalty_range=10, top_k=50, gadget=None) -> str:

        “”“Generate textual content autoregressively from a immediate.

     

        Args:

            mannequin: The educated LlamaForPretraining mannequin

            tokenizer: The tokenizer

            immediate: Enter textual content immediate

            max_tokens: Most variety of tokens to generate

            temperature: Sampling temperature (larger = extra random)

            repetition_penalty: Penalty for repeating tokens

            repetition_penalty_range: Variety of earlier tokens to think about for repetition penalty

            top_k: Solely pattern from prime okay most probably tokens

            gadget: System the mannequin is loaded on

     

        Returns:

            Generated textual content

        ““”

        # Flip mannequin to analysis mode: Norm layer will work otherwise

        mannequin.eval()

     

        # Get particular token IDs

        bot_id = tokenizer.token_to_id(“[BOT]”)

        eot_id = tokenizer.token_to_id(“[EOT]”)

     

        # Tokenize the immediate into integer tensor

        prompt_tokens = [bot_id] + tokenizer.encode(” “ + immediate).ids

        input_ids = torch.tensor([prompt_tokens], dtype=torch.int64, gadget=gadget)

     

        # Recursively generate tokens

        generated_tokens = []

        for _step in vary(max_tokens):

            # Ahead go by means of mannequin

            logits = mannequin(input_ids)

     

            # Get logits for the final token

            next_token_logits = logits[0, –1, :] / temperature

     

            # Apply repetition penalty

            if repetition_penalty != 1.0 and len(generated_tokens) > 0:

                next_token_logits = apply_repetition_penalty(

                    next_token_logits,

                    generated_tokens[–repetition_penalty_range:],

                    repetition_penalty,

                )

     

            # Apply top-k filtering

            if top_k > 0:

                top_k_logits = torch.topk(next_token_logits, top_k)[0]

                indices_to_remove = next_token_logits < top_k_logits[–1]

                next_token_logits[indices_to_remove] = float(“-inf”)

     

            # Pattern from the filtered distribution

            probs = F.softmax(next_token_logits, dim=–1)

            next_token = torch.multinomial(probs, num_samples=1)

     

            # Early cease if EOT token is generated

            if next_token.merchandise() == eot_id:

                break

     

            # Append the brand new token to input_ids for subsequent iteration

            input_ids = torch.cat([input_ids, next_token.unsqueeze(0)], dim=1)

            generated_tokens.append(next_token.merchandise())

     

        # Decode all generated tokens

        return tokenizer.decode(generated_tokens)

     

     

    checkpoint = “llama_model_final.pth”   # saved mannequin checkpoint

    tokenizer = “bpe_50K.json”   # saved tokenizer

    max_tokens = 100

    temperature = 0.9

    top_k = 50

    penalty = 1.1

    penalty_range = 10

     

    # Load tokenizer and mannequin

    gadget = torch.gadget(“cuda” if torch.cuda.is_available() else “cpu”)

    tokenizer = tokenizers.Tokenizer.from_file(tokenizer)

    config = LlamaConfig()

    mannequin = LlamaForPretraining(config).to(gadget)

    mannequin.load_state_dict(torch.load(checkpoint, map_location=gadget))

     

    immediate = “As soon as upon a time, there was”

    response = generate(

        mannequin=mannequin,

        tokenizer=tokenizer,

        immediate=immediate,

        max_tokens=max_tokens,

        temperature=temperature,

        top_k=top_k,

        repetition_penalty=penalty,

        repetition_penalty_range=penalty_range,

        gadget=gadget,

    )

    print(immediate)

    print(“-“ * 20)

    print(response)

    Share. Facebook Twitter Pinterest LinkedIn Tumblr Email
    Oliver Chambers
    • Website

    Related Posts

    High 7 AI Agent Orchestration Frameworks

    March 12, 2026

    Setting Up a Google Colab AI-Assisted Coding Surroundings That Really Works

    March 12, 2026

    We ran 16 AI Fashions on 9,000+ Actual Paperwork. Here is What We Discovered.

    March 12, 2026
    Top Posts

    Evaluating the Finest AI Video Mills for Social Media

    April 18, 2025

    Utilizing AI To Repair The Innovation Drawback: The Three Step Resolution

    April 18, 2025

    Midjourney V7: Quicker, smarter, extra reasonable

    April 18, 2025

    Meta resumes AI coaching utilizing EU person knowledge

    April 18, 2025
    Don't Miss

    High 7 AI Agent Orchestration Frameworks

    By Oliver ChambersMarch 12, 2026

    Picture by Writer   # Introduction  AI brokers assist construct autonomous programs that may plan, use…

    iRobot is bringing the Roomba Mini to the U.Ok. and Europe

    March 12, 2026

    AI use is altering how a lot firms pay for cyber insurance coverage

    March 12, 2026

    AI-Powered Cybercrime Is Surging. The US Misplaced $16.6 Billion in 2024.

    March 12, 2026
    Stay In Touch
    • Facebook
    • Twitter
    • Pinterest
    • Instagram
    • YouTube
    • Vimeo

    Subscribe to Updates

    Get the latest creative news from SmartMag about art & design.

    UK Tech Insider
    Facebook X (Twitter) Instagram
    • About Us
    • Contact Us
    • Privacy Policy
    • Terms Of Service
    • Our Authors
    © 2026 UK Tech Insider. All rights reserved by UK Tech Insider.

    Type above and press Enter to search. Press Esc to cancel.