refactor: streamline API call parameters and enhance token management for OpenAI interactions

This commit is contained in:
2025-08-08 11:44:17 +07:00
parent 59b5e13005
commit 61abc9b1e6
4 changed files with 119 additions and 84 deletions

View File

@@ -156,11 +156,16 @@ def setup_commands(bot: commands.Bot, db_handler, openai_client, image_generator
]
# Send to the AI model
response = await openai_client.chat.completions.create(
model=model if model in ["openai/gpt-4o", "openai/gpt-4o-mini", "openai/gpt-5", "openai/gpt-5-nano", "openai/gpt-5-mini", "openai/gpt-5-chat"] else "openai/gpt-4o",
messages=messages,
temperature=0.5
)
api_params = {
"model": model if model in ["openai/gpt-4o", "openai/gpt-4o-mini", "openai/gpt-5", "openai/gpt-5-nano", "openai/gpt-5-mini", "openai/gpt-5-chat"] else "openai/gpt-4o",
"messages": messages
}
# Add temperature only for models that support it (exclude GPT-5 family)
if model not in ["openai/gpt-5", "openai/gpt-5-nano", "openai/gpt-5-mini", "openai/gpt-5-chat"]:
api_params["temperature"] = 0.5
response = await openai_client.chat.completions.create(**api_params)
reply = response.choices[0].message.content
@@ -221,12 +226,17 @@ def setup_commands(bot: commands.Bot, db_handler, openai_client, image_generator
{"role": "user", "content": f"Content from {url}:\n{content}"}
]
response = await openai_client.chat.completions.create(
model=model if model in ["openai/gpt-4o", "openai/gpt-4o-mini", "openai/gpt-5", "openai/gpt-5-nano", "openai/gpt-5-mini", "openai/gpt-5-chat"] else "openai/gpt-4o",
messages=messages,
temperature=0.3,
top_p=0.7
)
api_params = {
"model": model if model in ["openai/gpt-4o", "openai/gpt-4o-mini", "openai/gpt-5", "openai/gpt-5-nano", "openai/gpt-5-mini", "openai/gpt-5-chat"] else "openai/gpt-4o",
"messages": messages
}
# Add temperature and top_p only for models that support them (exclude GPT-5 family)
if model not in ["openai/gpt-5", "openai/gpt-5-nano", "openai/gpt-5-mini", "openai/gpt-5-chat"]:
api_params["temperature"] = 0.3
api_params["top_p"] = 0.7
response = await openai_client.chat.completions.create(**api_params)
reply = response.choices[0].message.content

View File

@@ -174,7 +174,7 @@ class MessageHandler:
"""Create a reusable aiohttp session for better performance"""
if self.aiohttp_session is None or self.aiohttp_session.closed:
self.aiohttp_session = aiohttp.ClientSession(
timeout=aiohttp.ClientTimeout(total=120),
timeout=aiohttp.ClientTimeout(total=240),
connector=aiohttp.TCPConnector(limit=20, ttl_dns_cache=300)
)
@@ -857,6 +857,42 @@ class MessageHandler:
history.append(current_message)
messages_for_api = prepare_messages_for_api(history)
# Proactively trim history to avoid context overload while preserving system prompt
current_tokens = self._count_tokens(messages_for_api)
token_limit = MODEL_TOKEN_LIMITS.get(model, DEFAULT_TOKEN_LIMIT)
max_tokens = int(token_limit * 0.8) # Use 80% of limit to leave room for response
if current_tokens > max_tokens:
logging.info(f"Proactively trimming history: {current_tokens} tokens > {max_tokens} limit for {model}")
if model in ["openai/o1-mini", "openai/o1-preview"]:
# For o1 models, trim the history without system prompt
trimmed_history_without_system = self._trim_history_to_token_limit(history_without_system, model, max_tokens)
messages_for_api = prepare_messages_for_api(trimmed_history_without_system)
# Update the history tracking
history_without_system = trimmed_history_without_system
else:
# For regular models, trim the full history (preserving system prompt)
trimmed_history = self._trim_history_to_token_limit(history, model, max_tokens)
messages_for_api = prepare_messages_for_api(trimmed_history)
# Update the history tracking
history = trimmed_history
# Save the trimmed history immediately to keep it in sync
if model in ["openai/o1-mini", "openai/o1-preview"]:
new_history = []
if system_content:
new_history.append({"role": "system", "content": system_content})
new_history.extend(history_without_system[1:]) # Skip the "Instructions" message
await self.db.save_history(user_id, new_history)
else:
await self.db.save_history(user_id, history)
final_tokens = self._count_tokens(messages_for_api)
logging.info(f"History trimmed from {current_tokens} to {final_tokens} tokens")
# Determine which models should have tools available
# openai/o1-mini and openai/o1-preview do not support tools
use_tools = model in ["openai/gpt-4o", "openai/gpt-4o-mini", "openai/gpt-5", "openai/gpt-5-nano", "openai/gpt-5-mini", "openai/gpt-5-chat", "openai/o1", "openai/o3-mini", "openai/gpt-4.1", "openai/gpt-4.1-mini", "openai/gpt-4.1-nano", "openai/o3", "openai/o4-mini"]
@@ -865,11 +901,18 @@ class MessageHandler:
api_params = {
"model": model,
"messages": messages_for_api,
"temperature": 0.3 if model in ["openai/gpt-4o", "openai/gpt-4o-mini", "openai/gpt-5", "openai/gpt-5-nano", "openai/gpt-5-mini", "openai/gpt-5-chat"] else 1,
"top_p": 0.7 if model in ["openai/gpt-4o", "openai/gpt-4o-mini", "openai/gpt-5", "openai/gpt-5-nano", "openai/gpt-5-mini", "openai/gpt-5-chat"] else 1,
"timeout": 120 # Increased timeout for better response handling
"timeout": 240 # Increased timeout for better response handling
}
# Add temperature and top_p only for models that support them (exclude GPT-5 family)
if model in ["openai/gpt-4o", "openai/gpt-4o-mini"]:
api_params["temperature"] = 0.3
api_params["top_p"] = 0.7
elif model not in ["openai/gpt-5", "openai/gpt-5-nano", "openai/gpt-5-mini", "openai/gpt-5-chat"]:
# For other models (not GPT-4o family and not GPT-5 family)
api_params["temperature"] = 1
api_params["top_p"] = 1
# Add tools if using a supported model
if use_tools:
api_params["tools"] = get_tools_for_model()
@@ -879,59 +922,22 @@ class MessageHandler:
chart_id = None
image_urls = [] # Will store unique image URLs
# Make the initial API call without retry logic to avoid extra costs
# Make the initial API call
try:
response = await self.client.chat.completions.create(**api_params)
except Exception as e:
# Handle 413 Request Entity Too Large error automatically
# Handle 413 Request Entity Too Large error with a user-friendly message
if "413" in str(e) or "tokens_limit_reached" in str(e) or "Request body too large" in str(e):
logging.warning(f"Token limit exceeded for model {model}, automatically trimming history...")
# Trim the history to fit the model's token limit
current_tokens = self._count_tokens(messages_for_api)
logging.info(f"Current message tokens: {current_tokens}")
if model in ["openai/o1-mini", "openai/o1-preview"]:
# For o1 models, use the trimmed history without system prompt
trimmed_history_without_system = self._trim_history_to_token_limit(history_without_system, model)
messages_for_api = prepare_messages_for_api(trimmed_history_without_system)
else:
# For regular models, trim the full history
trimmed_history = self._trim_history_to_token_limit(history, model)
messages_for_api = prepare_messages_for_api(trimmed_history)
# Update API parameters with trimmed messages
api_params["messages"] = messages_for_api
# Save the trimmed history to prevent this issue in the future
if model in ["openai/o1-mini", "openai/o1-preview"]:
# For o1 models, save the trimmed history back to the database
new_history = []
if system_content:
new_history.append({"role": "system", "content": system_content})
new_history.extend(trimmed_history_without_system[1:]) # Skip the "Instructions" message
await self.db.save_history(user_id, new_history)
else:
await self.db.save_history(user_id, trimmed_history)
# Inform user about the automatic cleanup
await message.channel.send("🔧 **Auto-optimized conversation history** - Removed older messages to fit model limits.")
# Try the API call again with trimmed history
try:
response = await self.client.chat.completions.create(**api_params)
logging.info(f"Successfully processed request after history trimming for model {model}")
except Exception as retry_error:
# If it still fails, provide a helpful error message
await message.channel.send(
f"❌ **Request still too large for {model}**\n"
f"Even after optimizing history, the request is too large.\n"
f"Try:\n"
f"• Using a model with higher token limits\n"
f"• Reducing the size of your current message\n"
f"• Using `/clear_history` to start fresh"
)
return
await message.channel.send(
f"❌ **Request too large for {model}**\n"
f"Your conversation history or message is too large for this model.\n"
f"Try:\n"
f"• Using `/reset` to start fresh\n"
f"• Using a model with higher token limits\n"
f"• Reducing the size of your current message\n"
f"• Breaking up large files into smaller pieces"
)
return
else:
# Re-raise other errors
raise e
@@ -1007,12 +1013,20 @@ class MessageHandler:
# If tool calls were processed, make another API call with the updated messages
if tool_calls_processed and updated_messages:
response = await self.client.chat.completions.create(
model=model,
messages=updated_messages,
temperature=0.3 if model in ["openai/gpt-4o", "openai/gpt-4o-mini", "openai/gpt-5", "openai/gpt-5-nano", "openai/gpt-5-mini", "openai/gpt-5-chat"] else 1,
timeout=120
)
# Prepare API parameters for follow-up call
follow_up_params = {
"model": model,
"messages": updated_messages,
"timeout": 240
}
# Add temperature only for models that support it (exclude GPT-5 family)
if model in ["openai/gpt-4o", "openai/gpt-4o-mini"]:
follow_up_params["temperature"] = 0.3
elif model not in ["openai/gpt-5", "openai/gpt-5-nano", "openai/gpt-5-mini", "openai/gpt-5-chat"]:
follow_up_params["temperature"] = 1
response = await self.client.chat.completions.create(**follow_up_params)
reply = response.choices[0].message.content

View File

@@ -192,7 +192,7 @@ def get_tools_for_model() -> List[Dict[str, Any]]:
"input_data": {"type": "string", "description": "Optional input data"},
"install_packages": {"type": "array", "items": {"type": "string"}},
"enable_visualization": {"type": "boolean", "description": "For charts/graphs"},
"timeout": {"type": "integer", "default": 30, "minimum": 1, "maximum": 120}
"timeout": {"type": "integer", "default": 30, "minimum": 1, "maximum": 240}
},
"required": ["code"]
}
@@ -429,15 +429,21 @@ for col in numeric_cols[:3]: # Limit to first 3 columns
async def call_openai_api(client, messages, model, temperature=0.7, max_tokens=None, tools=None):
"""Call OpenAI API without retry logic to avoid extra costs."""
try:
# Prepare API parameters
api_params = {
"model": model,
"messages": messages,
"max_tokens": max_tokens,
"tools": tools,
"tool_choice": "auto" if tools else None
}
# Add temperature only for models that support it (exclude GPT-5 family)
if model not in ["openai/gpt-5", "openai/gpt-5-nano", "openai/gpt-5-mini", "openai/gpt-5-chat"]:
api_params["temperature"] = temperature
# Single API call without retries
response = await client.chat.completions.create(
model=model,
messages=messages,
temperature=temperature,
max_tokens=max_tokens,
tools=tools,
tool_choice="auto" if tools else None
)
response = await client.chat.completions.create(**api_params)
return response
except Exception as e:
logging.error(f"OpenAI API call failed: {str(e)}")

View File

@@ -156,11 +156,16 @@ async def process_pdf_batch(model: str, client, user_prompt: str, batch_content:
]
# Add await here
response = await client.chat.completions.create(
model=model,
messages=messages,
temperature=0.1
)
api_params = {
"model": model,
"messages": messages
}
# Add temperature only for models that support it (exclude GPT-5 family)
if model not in ["openai/gpt-5", "openai/gpt-5-nano", "openai/gpt-5-mini", "openai/gpt-5-chat"]:
api_params["temperature"] = 0.1
response = await client.chat.completions.create(**api_params)
reply = response.choices[0].message.content