Source code for llmize.llm.llm_call

import time
import requests
import json
from google import genai

from ..utils.logger import log_info, log_warning, log_error
from ..utils.decorators import time_it

#@time_it

[docs]
def generate_content(client, model, prompt, temperature=None, max_retries=None, retry_delay=None):
    """
    Generate content using the specified language model.

    This function delegates the content generation to either the Gemini or Hugging Face model
    based on the provided model name.

    Parameters:
    - client: The API client used to interact with the language model.
    - model (str): The name of the language model to use.
    - prompt (str): The textual prompt to generate content from.
    - temperature (float, optional): Controls the creativity of the responses (default from config).
    - max_retries (int, optional): Maximum number of retry attempts in case of rate-limiting (default from config).
    - retry_delay (int, optional): Delay in seconds between retry attempts (default from config).

    Returns:
    - str: The generated content as a string, or None if the request was unsuccessful after retries.
    """
    # Import config here to avoid circular imports
    from ..config import get_config
    config = get_config()
    
    # Use config defaults if not provided
    if temperature is None:
        temperature = config.temperature
    if max_retries is None:
        max_retries = config.max_retries
    if retry_delay is None:
        retry_delay = config.retry_delay

    if model.startswith(("google/", "gemini", "gemma")):
        return generate_content_gemini(client, model, prompt, temperature, max_retries, retry_delay)
    elif model.startswith("openrouter/"):
        return generate_content_openrouter(client, model, prompt, temperature, max_retries, retry_delay)
    else:
        return generate_content_huggingface(client, model, prompt, temperature, max_retries, retry_delay)    



[docs]
def generate_content_gemini(client, model, prompt, temperature, max_retries=10, retry_delay=5):
    # Strip google/ prefix if present for backward compatibility
    if model.startswith("google/"):
        model = model[6:]  # Remove "google/" prefix
    
    generate_content_config = genai.types.GenerateContentConfig(temperature=temperature)
    
    for attempt in range(max_retries):
        try:
            response = client.models.generate_content(model=model, contents=prompt, config=generate_content_config)
            return response.text  # If the request is successful, return the response
        except Exception as e:
            if 'RESOURCE_EXHAUSTED' in str(e):  # You can check for rate-limiting specific message in the error
                log_warning(f"LLM rate limit hit, retrying in {retry_delay} seconds...")
                time.sleep(retry_delay)  # Wait before retrying
            else:
                log_error(f"An error occurred: {e}")
                break  # If it's not a rate-limiting error, stop retrying
    log_warning("Max retries reached. Could not complete the request.")
    return None  # Return None if the max retries are reached and the request was unsuccessful



[docs]
def generate_content_huggingface(client, model, prompt, temperature, max_retries=10, retry_delay=5):
    messages = [
        { "role": "user", "content": prompt}
    ]
    for attempt in range(max_retries):
        try:
            completion = client.chat.completions.create(
                model=model,
                messages=messages,
                temperature=temperature)
            response = completion.choices[0].message.content
            return response  # If the request is successful, return the response
        except Exception as e:
            if 'RESOURCE_EXHAUSTED' in str(e):  # You can check for rate-limiting specific message in the error
                log_warning(f"LLM rate limit hit, retrying in {retry_delay} seconds...")
                time.sleep(retry_delay)  # Wait before retrying
            else:
                log_error(f"An error occurred: {e}")
                break  # If it's not a rate-limiting error, stop retrying
    log_warning("Max retries reached. Could not complete the request.")
    return None  # Return None if the max retries are reached and the request was unsuccessful



[docs]
def generate_content_openrouter(client, model, prompt, temperature, max_retries=10, retry_delay=5):
    """Generate content using OpenRouter API."""
    # Extract API key from client dict
    api_key = client["api_key"]
    
    # Prepare the request
    url = "https://openrouter.ai/api/v1/chat/completions"
    headers = {
        "Authorization": f"Bearer {api_key}",
        "Content-Type": "application/json",
        # Optional headers for rankings on openrouter.ai
        "HTTP-Referer": "https://github.com/rizkiokt/llmize",
        "X-Title": "LLMize Optimization Library",
    }
    data = {
        "model": model,  # Use the full model name with openrouter/ prefix stripped
        "messages": [
            {
                "role": "user",
                "content": prompt
            }
        ],
        "temperature": temperature
    }
    
    # Strip the openrouter/ prefix for the API call
    if model.startswith("openrouter/"):
        data["model"] = model[11:]  # Remove "openrouter/" prefix
    
    for attempt in range(max_retries):
        try:
            response = requests.post(url, headers=headers, data=json.dumps(data))
            response.raise_for_status()
            result = response.json()
            return result["choices"][0]["message"]["content"]
        except Exception as e:
            if 'rate limit' in str(e).lower() or '429' in str(e):
                log_warning(f"OpenRouter rate limit hit, retrying in {retry_delay} seconds...")
                time.sleep(retry_delay)
            else:
                log_error(f"An error occurred with OpenRouter: {e}")
                break
    log_warning("Max retries reached. Could not complete the request.")
    return None



[docs]
def generate_content_openai(client, model, prompt, temperature, max_retries=10, retry_delay=5):
    messages = [
        { "role": "user", "content": prompt}
    ]
    for attempt in range(max_retries):
        try:
            completion = client.chat.completions.create(
                model=model,
                messages=messages,
                temperature=temperature)
            response = completion.choices[0].message.content
            return response  # If the request is successful, return the response
        except Exception as e:
            if 'RESOURCE_EXHAUSTED' in str(e):  # You can check for rate-limiting specific message in the error
                log_warning(f"LLM rate limit hit, retrying in {retry_delay} seconds...")
                time.sleep(retry_delay)  # Wait before retrying
            else:
                log_error(f"An error occurred: {e}")
                break  # If it's not a rate-limiting error, stop retrying
    log_warning("Max retries reached. Could not complete the request.")
    
    return None  # Return None if the max retries are reached and the request was unsuccessful