mirror of
https://github.com/BerriAI/litellm.git
synced 2025-12-06 11:33:26 +08:00
* Fix: Properly close aiohttp client sessions to prevent resource leaks (#12107) - Add close() method to BaseLLMAIOHTTPHandler to properly close aiohttp ClientSession - Create async_client_cleanup module with utility functions to close all cached async clients - Register automatic cleanup at exit via atexit hook - Export close_litellm_async_clients() function for manual cleanup - Add comprehensive tests to verify resource cleanup This fixes the "Unclosed client session" and "Unclosed connector" warnings when using acompletion with Gemini and other models that use aiohttp. Fixes #12107 * Fix: Remove unused import to satisfy linter * Fix: Extend cleanup to handle AsyncHTTPHandler instances used by Gemini The original implementation only cleaned up BaseLLMAIOHTTPHandler instances, but Gemini/Vertex AI providers use AsyncHTTPHandler objects which contain httpx clients with aiohttp transports. This commit extends the cleanup function to: - Handle AsyncHTTPHandler instances by accessing their internal client - Close both the aiohttp transport and httpx client - Add generic fallback for any objects with aclose method This properly fixes the resource leak warnings for all provider types.
117 lines
3.7 KiB
Python
117 lines
3.7 KiB
Python
"""
|
|
Test that async HTTP clients are properly cleaned up to prevent resource leaks.
|
|
Issue: https://github.com/BerriAI/litellm/issues/12107
|
|
"""
|
|
import asyncio
|
|
import os
|
|
import warnings
|
|
|
|
import pytest
|
|
|
|
import litellm
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_acompletion_resource_cleanup():
|
|
"""Test that acompletion doesn't leave unclosed client sessions."""
|
|
# Suppress warnings to check for them later
|
|
with warnings.catch_warnings(record=True) as w:
|
|
warnings.simplefilter("always")
|
|
|
|
# Make an async completion call
|
|
response = await litellm.acompletion(
|
|
model="gemini/gemini-2.0-flash-lite-001",
|
|
messages=[{"role": "user", "content": "Hello"}],
|
|
mock_response="Hi there! How can I help you today?",
|
|
)
|
|
|
|
# Check that response was received
|
|
assert (
|
|
response.choices[0].message.content == "Hi there! How can I help you today?"
|
|
)
|
|
|
|
# Manually close async clients
|
|
await litellm.close_litellm_async_clients()
|
|
|
|
# Give a small delay for any warnings to appear
|
|
await asyncio.sleep(0.1)
|
|
|
|
# Check for resource warnings
|
|
resource_warnings = [
|
|
warning
|
|
for warning in w
|
|
if "Unclosed" in str(warning.message)
|
|
and (
|
|
"client session" in str(warning.message)
|
|
or "connector" in str(warning.message)
|
|
)
|
|
]
|
|
|
|
# Should be no unclosed resource warnings
|
|
assert (
|
|
len(resource_warnings) == 0
|
|
), f"Found unclosed resources: {[str(w.message) for w in resource_warnings]}"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_multiple_acompletion_calls_cleanup():
|
|
"""Test that multiple acompletion calls reuse clients and don't leak resources."""
|
|
with warnings.catch_warnings(record=True) as w:
|
|
warnings.simplefilter("always")
|
|
|
|
# Make multiple async completion calls
|
|
for i in range(3):
|
|
response = await litellm.acompletion(
|
|
model="gemini/gemini-2.0-flash-lite-001",
|
|
messages=[{"role": "user", "content": f"Hello {i}"}],
|
|
mock_response=f"Response {i}",
|
|
)
|
|
assert response.choices[0].message.content == f"Response {i}"
|
|
|
|
# Clean up
|
|
await litellm.close_litellm_async_clients()
|
|
|
|
# Give a small delay for any warnings to appear
|
|
await asyncio.sleep(0.1)
|
|
|
|
# Check for resource warnings
|
|
resource_warnings = [
|
|
warning
|
|
for warning in w
|
|
if "Unclosed" in str(warning.message)
|
|
and (
|
|
"client session" in str(warning.message)
|
|
or "connector" in str(warning.message)
|
|
)
|
|
]
|
|
|
|
assert (
|
|
len(resource_warnings) == 0
|
|
), f"Found unclosed resources: {[str(w.message) for w in resource_warnings]}"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_cleanup_function_is_safe_to_call_multiple_times():
|
|
"""Test that the cleanup function can be called multiple times safely."""
|
|
# This should not raise any errors
|
|
await litellm.close_litellm_async_clients()
|
|
await litellm.close_litellm_async_clients()
|
|
await litellm.close_litellm_async_clients()
|
|
|
|
# Should still work after multiple cleanups
|
|
response = await litellm.acompletion(
|
|
model="gemini/gemini-2.0-flash-lite-001",
|
|
messages=[{"role": "user", "content": "Hello"}],
|
|
mock_response="Hi!",
|
|
)
|
|
assert response.choices[0].message.content == "Hi!"
|
|
|
|
# Clean up again
|
|
await litellm.close_litellm_async_clients()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Run the test
|
|
asyncio.run(test_acompletion_resource_cleanup())
|
|
print("✅ All tests passed!")
|