mirror of
https://github.com/BerriAI/litellm.git
synced 2025-12-06 11:33:26 +08:00
147 lines
3.3 KiB
Plaintext
Vendored
147 lines
3.3 KiB
Plaintext
Vendored
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "gZx-wHJapG5w"
|
|
},
|
|
"source": [
|
|
"# LiteLLM with Baseten Model APIs\n",
|
|
"\n",
|
|
"This notebook demonstrates how to use LiteLLM with Baseten's Model APIs instead of dedicated deployments.\n",
|
|
"\n",
|
|
"## Example Usage\n",
|
|
"```python\n",
|
|
"response = completion(\n",
|
|
" model=\"baseten/openai/gpt-oss-120b\",\n",
|
|
" messages=[{\"role\": \"user\", \"content\": \"Hello!\"}],\n",
|
|
" max_tokens=1000,\n",
|
|
" temperature=0.7\n",
|
|
")\n",
|
|
"```\n",
|
|
"\n",
|
|
"## Setup"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"id": "4JSRa0QVogPo"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"%pip install litellm"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"id": "VEukLhDzo4vw"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import os\n",
|
|
"from litellm import completion\n",
|
|
"\n",
|
|
"# Set your Baseten API key\n",
|
|
"os.environ['BASETEN_API_KEY'] = \"\" #@param {type:\"string\"}\n",
|
|
"\n",
|
|
"# Test message\n",
|
|
"messages = [{\"role\": \"user\", \"content\": \"What is AGI?\"}]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "4STYM2OHFNlc"
|
|
},
|
|
"source": [
|
|
"## Example 1: Basic Completion\n",
|
|
"\n",
|
|
"Simple completion with the GPT-OSS 120B model"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"id": "DorpLxw1FHbC"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"print(\"=== Basic Completion ===\")\n",
|
|
"response = completion(\n",
|
|
" model=\"baseten/openai/gpt-oss-120b\",\n",
|
|
" messages=messages,\n",
|
|
" max_tokens=1000,\n",
|
|
" temperature=0.7,\n",
|
|
" top_p=0.9,\n",
|
|
" presence_penalty=0.1,\n",
|
|
" frequency_penalty=0.1,\n",
|
|
")\n",
|
|
"print(f\"Response: {response.choices[0].message.content}\")\n",
|
|
"print(f\"Usage: {response.usage}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "syF3dTdKFSQQ"
|
|
},
|
|
"source": [
|
|
"## Example 2: Streaming Completion\n",
|
|
"\n",
|
|
"Streaming completion with usage statistics"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/"
|
|
},
|
|
"id": "rPgSoMlsojz0",
|
|
"outputId": "81d6dc7b-1681-4ae4-e4c8-5684eb1bd050"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"print(\"=== Streaming Completion ===\")\n",
|
|
"response = completion(\n",
|
|
" model=\"baseten/openai/gpt-oss-120b\",\n",
|
|
" messages=[{\"role\": \"user\", \"content\": \"Write a short poem about AI\"}],\n",
|
|
" stream=True,\n",
|
|
" max_tokens=500,\n",
|
|
" temperature=0.8,\n",
|
|
" stream_options={\n",
|
|
" \"include_usage\": True,\n",
|
|
" \"continuous_usage_stats\": True\n",
|
|
" },\n",
|
|
")\n",
|
|
"\n",
|
|
"print(\"Streaming response:\")\n",
|
|
"for chunk in response:\n",
|
|
" if chunk.choices and chunk.choices[0].delta.content:\n",
|
|
" print(chunk.choices[0].delta.content, end=\"\", flush=True)\n",
|
|
"print(\"\\n\")"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"colab": {
|
|
"provenance": []
|
|
},
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"name": "python"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 0
|
|
}
|