refactor: streamline API call parameters and enhance token management for OpenAI interactions
This commit is contained in:
@@ -156,11 +156,16 @@ def setup_commands(bot: commands.Bot, db_handler, openai_client, image_generator
|
||||
]
|
||||
|
||||
# Send to the AI model
|
||||
response = await openai_client.chat.completions.create(
|
||||
model=model if model in ["openai/gpt-4o", "openai/gpt-4o-mini", "openai/gpt-5", "openai/gpt-5-nano", "openai/gpt-5-mini", "openai/gpt-5-chat"] else "openai/gpt-4o",
|
||||
messages=messages,
|
||||
temperature=0.5
|
||||
)
|
||||
api_params = {
|
||||
"model": model if model in ["openai/gpt-4o", "openai/gpt-4o-mini", "openai/gpt-5", "openai/gpt-5-nano", "openai/gpt-5-mini", "openai/gpt-5-chat"] else "openai/gpt-4o",
|
||||
"messages": messages
|
||||
}
|
||||
|
||||
# Add temperature only for models that support it (exclude GPT-5 family)
|
||||
if model not in ["openai/gpt-5", "openai/gpt-5-nano", "openai/gpt-5-mini", "openai/gpt-5-chat"]:
|
||||
api_params["temperature"] = 0.5
|
||||
|
||||
response = await openai_client.chat.completions.create(**api_params)
|
||||
|
||||
reply = response.choices[0].message.content
|
||||
|
||||
@@ -221,12 +226,17 @@ def setup_commands(bot: commands.Bot, db_handler, openai_client, image_generator
|
||||
{"role": "user", "content": f"Content from {url}:\n{content}"}
|
||||
]
|
||||
|
||||
response = await openai_client.chat.completions.create(
|
||||
model=model if model in ["openai/gpt-4o", "openai/gpt-4o-mini", "openai/gpt-5", "openai/gpt-5-nano", "openai/gpt-5-mini", "openai/gpt-5-chat"] else "openai/gpt-4o",
|
||||
messages=messages,
|
||||
temperature=0.3,
|
||||
top_p=0.7
|
||||
)
|
||||
api_params = {
|
||||
"model": model if model in ["openai/gpt-4o", "openai/gpt-4o-mini", "openai/gpt-5", "openai/gpt-5-nano", "openai/gpt-5-mini", "openai/gpt-5-chat"] else "openai/gpt-4o",
|
||||
"messages": messages
|
||||
}
|
||||
|
||||
# Add temperature and top_p only for models that support them (exclude GPT-5 family)
|
||||
if model not in ["openai/gpt-5", "openai/gpt-5-nano", "openai/gpt-5-mini", "openai/gpt-5-chat"]:
|
||||
api_params["temperature"] = 0.3
|
||||
api_params["top_p"] = 0.7
|
||||
|
||||
response = await openai_client.chat.completions.create(**api_params)
|
||||
|
||||
reply = response.choices[0].message.content
|
||||
|
||||
|
||||
@@ -174,7 +174,7 @@ class MessageHandler:
|
||||
"""Create a reusable aiohttp session for better performance"""
|
||||
if self.aiohttp_session is None or self.aiohttp_session.closed:
|
||||
self.aiohttp_session = aiohttp.ClientSession(
|
||||
timeout=aiohttp.ClientTimeout(total=120),
|
||||
timeout=aiohttp.ClientTimeout(total=240),
|
||||
connector=aiohttp.TCPConnector(limit=20, ttl_dns_cache=300)
|
||||
)
|
||||
|
||||
@@ -857,6 +857,42 @@ class MessageHandler:
|
||||
history.append(current_message)
|
||||
messages_for_api = prepare_messages_for_api(history)
|
||||
|
||||
# Proactively trim history to avoid context overload while preserving system prompt
|
||||
current_tokens = self._count_tokens(messages_for_api)
|
||||
token_limit = MODEL_TOKEN_LIMITS.get(model, DEFAULT_TOKEN_LIMIT)
|
||||
max_tokens = int(token_limit * 0.8) # Use 80% of limit to leave room for response
|
||||
|
||||
if current_tokens > max_tokens:
|
||||
logging.info(f"Proactively trimming history: {current_tokens} tokens > {max_tokens} limit for {model}")
|
||||
|
||||
if model in ["openai/o1-mini", "openai/o1-preview"]:
|
||||
# For o1 models, trim the history without system prompt
|
||||
trimmed_history_without_system = self._trim_history_to_token_limit(history_without_system, model, max_tokens)
|
||||
messages_for_api = prepare_messages_for_api(trimmed_history_without_system)
|
||||
|
||||
# Update the history tracking
|
||||
history_without_system = trimmed_history_without_system
|
||||
else:
|
||||
# For regular models, trim the full history (preserving system prompt)
|
||||
trimmed_history = self._trim_history_to_token_limit(history, model, max_tokens)
|
||||
messages_for_api = prepare_messages_for_api(trimmed_history)
|
||||
|
||||
# Update the history tracking
|
||||
history = trimmed_history
|
||||
|
||||
# Save the trimmed history immediately to keep it in sync
|
||||
if model in ["openai/o1-mini", "openai/o1-preview"]:
|
||||
new_history = []
|
||||
if system_content:
|
||||
new_history.append({"role": "system", "content": system_content})
|
||||
new_history.extend(history_without_system[1:]) # Skip the "Instructions" message
|
||||
await self.db.save_history(user_id, new_history)
|
||||
else:
|
||||
await self.db.save_history(user_id, history)
|
||||
|
||||
final_tokens = self._count_tokens(messages_for_api)
|
||||
logging.info(f"History trimmed from {current_tokens} to {final_tokens} tokens")
|
||||
|
||||
# Determine which models should have tools available
|
||||
# openai/o1-mini and openai/o1-preview do not support tools
|
||||
use_tools = model in ["openai/gpt-4o", "openai/gpt-4o-mini", "openai/gpt-5", "openai/gpt-5-nano", "openai/gpt-5-mini", "openai/gpt-5-chat", "openai/o1", "openai/o3-mini", "openai/gpt-4.1", "openai/gpt-4.1-mini", "openai/gpt-4.1-nano", "openai/o3", "openai/o4-mini"]
|
||||
@@ -865,11 +901,18 @@ class MessageHandler:
|
||||
api_params = {
|
||||
"model": model,
|
||||
"messages": messages_for_api,
|
||||
"temperature": 0.3 if model in ["openai/gpt-4o", "openai/gpt-4o-mini", "openai/gpt-5", "openai/gpt-5-nano", "openai/gpt-5-mini", "openai/gpt-5-chat"] else 1,
|
||||
"top_p": 0.7 if model in ["openai/gpt-4o", "openai/gpt-4o-mini", "openai/gpt-5", "openai/gpt-5-nano", "openai/gpt-5-mini", "openai/gpt-5-chat"] else 1,
|
||||
"timeout": 120 # Increased timeout for better response handling
|
||||
"timeout": 240 # Increased timeout for better response handling
|
||||
}
|
||||
|
||||
# Add temperature and top_p only for models that support them (exclude GPT-5 family)
|
||||
if model in ["openai/gpt-4o", "openai/gpt-4o-mini"]:
|
||||
api_params["temperature"] = 0.3
|
||||
api_params["top_p"] = 0.7
|
||||
elif model not in ["openai/gpt-5", "openai/gpt-5-nano", "openai/gpt-5-mini", "openai/gpt-5-chat"]:
|
||||
# For other models (not GPT-4o family and not GPT-5 family)
|
||||
api_params["temperature"] = 1
|
||||
api_params["top_p"] = 1
|
||||
|
||||
# Add tools if using a supported model
|
||||
if use_tools:
|
||||
api_params["tools"] = get_tools_for_model()
|
||||
@@ -879,59 +922,22 @@ class MessageHandler:
|
||||
chart_id = None
|
||||
image_urls = [] # Will store unique image URLs
|
||||
|
||||
# Make the initial API call without retry logic to avoid extra costs
|
||||
# Make the initial API call
|
||||
try:
|
||||
response = await self.client.chat.completions.create(**api_params)
|
||||
except Exception as e:
|
||||
# Handle 413 Request Entity Too Large error automatically
|
||||
# Handle 413 Request Entity Too Large error with a user-friendly message
|
||||
if "413" in str(e) or "tokens_limit_reached" in str(e) or "Request body too large" in str(e):
|
||||
logging.warning(f"Token limit exceeded for model {model}, automatically trimming history...")
|
||||
|
||||
# Trim the history to fit the model's token limit
|
||||
current_tokens = self._count_tokens(messages_for_api)
|
||||
logging.info(f"Current message tokens: {current_tokens}")
|
||||
|
||||
if model in ["openai/o1-mini", "openai/o1-preview"]:
|
||||
# For o1 models, use the trimmed history without system prompt
|
||||
trimmed_history_without_system = self._trim_history_to_token_limit(history_without_system, model)
|
||||
messages_for_api = prepare_messages_for_api(trimmed_history_without_system)
|
||||
else:
|
||||
# For regular models, trim the full history
|
||||
trimmed_history = self._trim_history_to_token_limit(history, model)
|
||||
messages_for_api = prepare_messages_for_api(trimmed_history)
|
||||
|
||||
# Update API parameters with trimmed messages
|
||||
api_params["messages"] = messages_for_api
|
||||
|
||||
# Save the trimmed history to prevent this issue in the future
|
||||
if model in ["openai/o1-mini", "openai/o1-preview"]:
|
||||
# For o1 models, save the trimmed history back to the database
|
||||
new_history = []
|
||||
if system_content:
|
||||
new_history.append({"role": "system", "content": system_content})
|
||||
new_history.extend(trimmed_history_without_system[1:]) # Skip the "Instructions" message
|
||||
await self.db.save_history(user_id, new_history)
|
||||
else:
|
||||
await self.db.save_history(user_id, trimmed_history)
|
||||
|
||||
# Inform user about the automatic cleanup
|
||||
await message.channel.send("🔧 **Auto-optimized conversation history** - Removed older messages to fit model limits.")
|
||||
|
||||
# Try the API call again with trimmed history
|
||||
try:
|
||||
response = await self.client.chat.completions.create(**api_params)
|
||||
logging.info(f"Successfully processed request after history trimming for model {model}")
|
||||
except Exception as retry_error:
|
||||
# If it still fails, provide a helpful error message
|
||||
await message.channel.send(
|
||||
f"❌ **Request still too large for {model}**\n"
|
||||
f"Even after optimizing history, the request is too large.\n"
|
||||
f"Try:\n"
|
||||
f"• Using a model with higher token limits\n"
|
||||
f"• Reducing the size of your current message\n"
|
||||
f"• Using `/clear_history` to start fresh"
|
||||
)
|
||||
return
|
||||
await message.channel.send(
|
||||
f"❌ **Request too large for {model}**\n"
|
||||
f"Your conversation history or message is too large for this model.\n"
|
||||
f"Try:\n"
|
||||
f"• Using `/reset` to start fresh\n"
|
||||
f"• Using a model with higher token limits\n"
|
||||
f"• Reducing the size of your current message\n"
|
||||
f"• Breaking up large files into smaller pieces"
|
||||
)
|
||||
return
|
||||
else:
|
||||
# Re-raise other errors
|
||||
raise e
|
||||
@@ -1007,12 +1013,20 @@ class MessageHandler:
|
||||
|
||||
# If tool calls were processed, make another API call with the updated messages
|
||||
if tool_calls_processed and updated_messages:
|
||||
response = await self.client.chat.completions.create(
|
||||
model=model,
|
||||
messages=updated_messages,
|
||||
temperature=0.3 if model in ["openai/gpt-4o", "openai/gpt-4o-mini", "openai/gpt-5", "openai/gpt-5-nano", "openai/gpt-5-mini", "openai/gpt-5-chat"] else 1,
|
||||
timeout=120
|
||||
)
|
||||
# Prepare API parameters for follow-up call
|
||||
follow_up_params = {
|
||||
"model": model,
|
||||
"messages": updated_messages,
|
||||
"timeout": 240
|
||||
}
|
||||
|
||||
# Add temperature only for models that support it (exclude GPT-5 family)
|
||||
if model in ["openai/gpt-4o", "openai/gpt-4o-mini"]:
|
||||
follow_up_params["temperature"] = 0.3
|
||||
elif model not in ["openai/gpt-5", "openai/gpt-5-nano", "openai/gpt-5-mini", "openai/gpt-5-chat"]:
|
||||
follow_up_params["temperature"] = 1
|
||||
|
||||
response = await self.client.chat.completions.create(**follow_up_params)
|
||||
|
||||
reply = response.choices[0].message.content
|
||||
|
||||
|
||||
@@ -192,7 +192,7 @@ def get_tools_for_model() -> List[Dict[str, Any]]:
|
||||
"input_data": {"type": "string", "description": "Optional input data"},
|
||||
"install_packages": {"type": "array", "items": {"type": "string"}},
|
||||
"enable_visualization": {"type": "boolean", "description": "For charts/graphs"},
|
||||
"timeout": {"type": "integer", "default": 30, "minimum": 1, "maximum": 120}
|
||||
"timeout": {"type": "integer", "default": 30, "minimum": 1, "maximum": 240}
|
||||
},
|
||||
"required": ["code"]
|
||||
}
|
||||
@@ -429,15 +429,21 @@ for col in numeric_cols[:3]: # Limit to first 3 columns
|
||||
async def call_openai_api(client, messages, model, temperature=0.7, max_tokens=None, tools=None):
|
||||
"""Call OpenAI API without retry logic to avoid extra costs."""
|
||||
try:
|
||||
# Prepare API parameters
|
||||
api_params = {
|
||||
"model": model,
|
||||
"messages": messages,
|
||||
"max_tokens": max_tokens,
|
||||
"tools": tools,
|
||||
"tool_choice": "auto" if tools else None
|
||||
}
|
||||
|
||||
# Add temperature only for models that support it (exclude GPT-5 family)
|
||||
if model not in ["openai/gpt-5", "openai/gpt-5-nano", "openai/gpt-5-mini", "openai/gpt-5-chat"]:
|
||||
api_params["temperature"] = temperature
|
||||
|
||||
# Single API call without retries
|
||||
response = await client.chat.completions.create(
|
||||
model=model,
|
||||
messages=messages,
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
tools=tools,
|
||||
tool_choice="auto" if tools else None
|
||||
)
|
||||
response = await client.chat.completions.create(**api_params)
|
||||
return response
|
||||
except Exception as e:
|
||||
logging.error(f"OpenAI API call failed: {str(e)}")
|
||||
|
||||
@@ -156,11 +156,16 @@ async def process_pdf_batch(model: str, client, user_prompt: str, batch_content:
|
||||
]
|
||||
|
||||
# Add await here
|
||||
response = await client.chat.completions.create(
|
||||
model=model,
|
||||
messages=messages,
|
||||
temperature=0.1
|
||||
)
|
||||
api_params = {
|
||||
"model": model,
|
||||
"messages": messages
|
||||
}
|
||||
|
||||
# Add temperature only for models that support it (exclude GPT-5 family)
|
||||
if model not in ["openai/gpt-5", "openai/gpt-5-nano", "openai/gpt-5-mini", "openai/gpt-5-chat"]:
|
||||
api_params["temperature"] = 0.1
|
||||
|
||||
response = await client.chat.completions.create(**api_params)
|
||||
|
||||
reply = response.choices[0].message.content
|
||||
|
||||
|
||||
Reference in New Issue
Block a user