Chat models is part of the Agentic SDK LangChain integration. It provides a chat model interface that routes chat completion requests through watsonx Orchestrate using a consistent SDK interface. It aligns with the SDK runtime model for handling authentication, context, and API routing, and supports chat-based interactions, structured outputs, tool calling, and streaming responses. Chat models support the same API as langchain’s chat abstractions, and may be used as a direct replacement for running inside Orchestrate.
From Instance Credentials (Standalone/Runs-Elsewhere Mode)
For standalone scripts or applications outside watsonx Orchestrate runtime:
PYTHON
from ibm_watsonx_orchestrate_sdk.langchain import ChatWxOllm = ChatWxO.from_instance_credentials( instance_url="https://your-instance.cloud.ibm.com", api_key="your-wxo-api-key", model="watsonx/meta-llama/llama-3-2-90b-vision-instruct", temperature=0.7, max_tokens=1000)response = llm.invoke("Tell me a joke about programming")print(response.content)
When running inside a watsonx Orchestrate runtime with execution context:
PYTHON
from ibm_watsonx_orchestrate_sdk.langchain import ChatWxO# Execution context provided by WxO runtimeexecution_context = runnable_config.get("configurable", {}).get("execution_context")llm = ChatWxO.from_execution_context( execution_context=execution_context, model="watsonx/ibm/granite-3-8b-instruct", temperature=0.2)response = llm.invoke("What is the capital of France?")print(response.content)
from ibm_watsonx_orchestrate_sdk.langchain import ChatWxOllm = ChatWxO( instance_url="https://your-instance.cloud.ibm.com", api_key="your-wxo-api-key", model="watsonx/meta-llama/llama-3-2-90b-vision-instruct", temperature=0.7, max_tokens=1000)response = llm.invoke("Tell me a joke about programming")print(response.content)
# Synchronous streamingfor chunk in llm.stream("Write a short story about a robot"): print(chunk.content, end="", flush=True)# Async streamingimport asyncioasync def stream_example(): async for chunk in llm.astream("Explain photosynthesis"): print(chunk.content, end="", flush=True)asyncio.run(stream_example())
from pydantic import BaseModel, Fieldclass GetWeather(BaseModel): """Get the current weather for a location""" location: str = Field(description="City and state, e.g. San Francisco, CA") unit: str = Field(description="Temperature unit", enum=["celsius", "fahrenheit"])class GetPopulation(BaseModel): """Get the population of a city""" location: str = Field(description="City and state, e.g. San Francisco, CA")# Bind tools to the modelllm_with_tools = llm.bind_tools([GetWeather, GetPopulation])response = llm_with_tools.invoke("What's the weather and population in NYC?")# Access tool callsfor tool_call in response.tool_calls: print(f"Tool: {tool_call['name']}") print(f"Args: {tool_call['args']}")
from pydantic import BaseModel, Fieldclass Person(BaseModel): """Information about a person""" name: str = Field(description="Person's full name") age: int = Field(description="Person's age in years") occupation: str = Field(description="Person's job or profession") hobbies: list[str] = Field(description="List of hobbies")# Create structured output modelstructured_llm = llm.with_structured_output(Person)# Get structured responseperson = structured_llm.invoke( "Tell me about a software engineer named Alice who is 28 years old " "and enjoys hiking, reading, and photography.")print(f"Name: {person.name}")print(f"Age: {person.age}")print(f"Occupation: {person.occupation}")print(f"Hobbies: {', '.join(person.hobbies)}")