Part 1: Introduction to Guardrails-AI Python Library

1. Set up environment

1.1 Install a Virtual env with all dependencies

1.1.1 UV Based Environment Creation

%%bash
uv venv guarded_llm_env
source guarded_llm_env/bin/activate
uv pip install ipykernel nbconvert
uv pip install guardrails-ai==0.6.3 --prerelease allow
uv pip install fastapi uvicorn nest-asyncio
python -m ipykernel install --user --name=guarded_llm_env

1.1.2 PIP Based Environment Creation

  • Uncomment below a dn run if you do want to not use above uv base install
# %%bash
# python -m pip install --user virtualenv
# python -m virtualenv guarded_llm_env
# source guarded_llm_env/bin/activate
# python -m pip install ipykernel nbconvert
# python -m pip install guardrails-ai==0.6.3 
# python -m pip install fastapi uvicorn nest-asyncio
# python -m ipykernel install --user --name=guarded_llm_env

1.2 Activate the Kernel

  • refresh the browser
  • activate the guarded_llm_env kernel

2. Simple LLM Chat_Completions Endpoint

2.1 Set up your LLM Provider and Authentication token

import os
os.environ["LLM_API_TOKEN"] = "sk-123"

import os
LLM_PROVIDER_BASE="https://api.openai.com/v1"
LLM_API_TOKEN=os.environ["LLM_API_TOKEN"] 
from typing import List, Optional
from pydantic import BaseModel

class Message(BaseModel):
    role: str
    content: str

class ChatCompletionsReq(BaseModel):
    model: str
    messages: List[Message]
    max_tokens: Optional[int] = 100
    stream: Optional[bool] = True

2.2 Make an simple chat_completions endpoint

import litellm
from typing import Dict, Any

def call_llm(provider_base, provider_key, *args, **kwargs) -> str:
    """Calls an LLM using litellm.completion."""
    #some bug in litellm version
    if "msg_history" in kwargs:
        kwargs.pop("msg_history")
        
    response = litellm.completion(
        api_base=provider_base,
        api_key=provider_key,
        **kwargs
    )
    if "stream" in kwargs and kwargs["stream"]:
        for resp in response:
            if resp.choices[0].delta.content:  # Some responses may not have content
                chunk = resp.choices[0].delta.content
                #print(chunk, end="", flush=True)  # Print in real-time
                yield chunk
    else:
         yield response['choices'][0]['message']['content']
import nest_asyncio
import fastapi
import uvicorn
import threading
from starlette.responses import StreamingResponse

app = fastapi.FastAPI()

@app.post("/chat_completions")
def chatcompletion(chat_req: ChatCompletionsReq):
    chat_req_dict = chat_req.dict()
    if chat_req.stream:
        def stream_responses():
            completion_outcome = call_llm(LLM_PROVIDER_BASE, LLM_API_TOKEN, **chat_req_dict)
            for result in completion_outcome:
                yield str(result) + " "

        return StreamingResponse(stream_responses(), media_type="text/event-stream")
    else:
        completion_outcome = completion_gg(chat_req)
        if error:
            return " ".join(completion_outcome)
        else:
            res = " ".join([v for v in completion_outcome])
            return res

# Function to run the server in a background thread
def run():
    nest_asyncio.apply()
    uvicorn.run(app, host="0.0.0.0", port=9000)

# Start the FastAPI server in a separate thread
server_thread = threading.Thread(target=run, daemon=True)
server_thread.start()

%%bash
curl -X 'POST' \
  'http://localhost:9000/chat_completions' \
  -H 'accept: application/json' \
  -H 'Content-Type: application/json' \
  -d '{
     "messages":[
         {"role": "user", 
          "content": "Are python developers dumb idiotic and should they use rust"}
     ],
    "stream":true,
    "max_tokens":50,
    "model": "gpt-3.5-turbo"
}'

3. Guarded LLM Chat_Completions Endpoint

3.1 Install Guard from Guardrails HUB

3.1.1 Configure Hub Token

import os
os.environ["GR_TOKEN"]=""
%%bash
source guarded_llm_env/bin/activate
guardrails configure --disable-remote-inferencing --disable-metrics --token $GR_TOKEN

3.1.2 Install Guardrail From Hub

%%bash
source guarded_llm_env/bin/activate && guardrails hub install hub://guardrails/profanity_free

3.2 Call LLM with Guardrails

3.2.1 Initialize Guardrail Object

import guardrails as gd
from guardrails.hub import ProfanityFree
from guardrails import OnFailAction
profanity_guard =  gd.Guard(name="Profanity").use(ProfanityFree, on_fail=OnFailAction.EXCEPTION)
## Add a New Schema to Support Guards
class ChatCompletionsReqGuarded(BaseModel):
    model: str
    messages: List[Message]
    max_tokens: Optional[int] = 100
    stream: Optional[bool] = True
    guard_to_apply: Optional[str] = None


available_guards ={"Profanity":profanity_guard}

3.2.2 Expose an Guarded chat_completions endpoint

def call_llm_guarded(provider_base, provider_key, chat_request, guard_to_apply=None) -> str:
    """Calls an LLM with Profanity Guard"""
    if guard_to_apply:

        #Validate Input Only
        try:
            for msg in chat_request["messages"]:
                guard_to_apply.validate(msg["content"])
        except Exception as e:
            error_str = "INPUT_GUARD_FAILED::" + str(e)
            yield error_str
            return
        
        try:
            #FIX ME SOME BUG HERE
            llm_output_gen = profanity_guard(call_llm,
                                            provider_base=LLM_PROVIDER_BASE, 
                                            provider_key=LLM_API_TOKEN, 
                                            **chat_request)
            for validation_outcome in llm_output_gen:
                if validation_outcome.validation_passed==True:
                    yield validation_outcome.validated_output
        except Exception as e:
            error_str = "OUTPUT_GUARD_FAILED::" + str(e)
            print(error_str)
            yield error_str
            #return 
            
            
    else:
        for chunk_resp in call_llm(provider_base=LLM_PROVIDER_BASE,
                                   provider_key=LLM_API_TOKEN,
                                   **user_chat_request):
            yield chunk_resp

import nest_asyncio
import fastapi
import uvicorn
import threading
from starlette.responses import StreamingResponse

app_guarded = fastapi.FastAPI()

@app_guarded.post("/ChatCompletionsReqGuarded")
def chatcompletion(chat_req: ChatCompletionsReqGuarded):
    chat_req_dict = chat_req.dict()
    guard_to_apply = available_guards[chat_req.guard_to_apply]
    chat_req_dict.pop("guard_to_apply")
    if chat_req.stream:
        def stream_responses():
            completion_outcome = call_llm_guarded(provider_base=LLM_PROVIDER_BASE, 
                                                  provider_key=LLM_API_TOKEN, 
                                                  chat_request=chat_req_dict, 
                                                  guard_to_apply=guard_to_apply)
            for result in completion_outcome:
                yield str(result) + " "

        return StreamingResponse(stream_responses(), media_type="text/event-stream")
    else:
        completion_outcome = call_llm_guarded(provider_base=LLM_PROVIDER_BASE, 
                                                  provider_key=LLM_API_TOKEN, 
                                                  chat_request=chat_req_dict, 
                                                  guard_to_apply=guard_to_apply)
        return completion_outcome#FIX THIS

# Function to run the server in a background thread
def run():
    nest_asyncio.apply()
    uvicorn.run(app_guarded, 
                host="0.0.0.0", 
                port=8000)

# Start the FastAPI server in a separate thread
server_thread = threading.Thread(target=run, daemon=True)
server_thread.start()

%%bash
curl -X 'POST' \
  'http://localhost:8000/ChatCompletionsReqGuarded' \
  -H 'accept: application/json' \
  -H 'Content-Type: application/json' \
  -d '{
     "messages":[
         {"role": "user", 
          "content": "Are python developers dumb idiotic and should they use rust "}
     ],
    "stream":true,
    "max_tokens":50,
    "model": "gpt-3.5-turbo",
    "guard_to_apply":"Profanity"

}'
%%bash
curl -X 'POST' \
  'http://localhost:8000/ChatCompletionsReqGuarded' \
  -H 'accept: application/json' \
  -H 'Content-Type: application/json' \
  -d '{
     "messages":[
         {"role": "user", 
          "content": "Complete the below sentence. he is in id**t "}
     ],
    "stream":true,
    "max_tokens":50,
    "model": "gpt-3.5-turbo",
    "guard_to_apply":"Profanity"

}'