From 61abc9b1e61c1ccb91441886a737f21c657deb45 Mon Sep 17 00:00:00 2001 From: cauvang32 Date: Fri, 8 Aug 2025 11:44:17 +0700 Subject: [PATCH] refactor: streamline API call parameters and enhance token management for OpenAI interactions --- src/commands/commands.py | 32 ++++++--- src/module/message_handler.py | 132 +++++++++++++++++++--------------- src/utils/openai_utils.py | 24 ++++--- src/utils/pdf_utils.py | 15 ++-- 4 files changed, 119 insertions(+), 84 deletions(-) diff --git a/src/commands/commands.py b/src/commands/commands.py index 203a91f..1e4dd1b 100644 --- a/src/commands/commands.py +++ b/src/commands/commands.py @@ -156,11 +156,16 @@ def setup_commands(bot: commands.Bot, db_handler, openai_client, image_generator ] # Send to the AI model - response = await openai_client.chat.completions.create( - model=model if model in ["openai/gpt-4o", "openai/gpt-4o-mini", "openai/gpt-5", "openai/gpt-5-nano", "openai/gpt-5-mini", "openai/gpt-5-chat"] else "openai/gpt-4o", - messages=messages, - temperature=0.5 - ) + api_params = { + "model": model if model in ["openai/gpt-4o", "openai/gpt-4o-mini", "openai/gpt-5", "openai/gpt-5-nano", "openai/gpt-5-mini", "openai/gpt-5-chat"] else "openai/gpt-4o", + "messages": messages + } + + # Add temperature only for models that support it (exclude GPT-5 family) + if model not in ["openai/gpt-5", "openai/gpt-5-nano", "openai/gpt-5-mini", "openai/gpt-5-chat"]: + api_params["temperature"] = 0.5 + + response = await openai_client.chat.completions.create(**api_params) reply = response.choices[0].message.content @@ -221,12 +226,17 @@ def setup_commands(bot: commands.Bot, db_handler, openai_client, image_generator {"role": "user", "content": f"Content from {url}:\n{content}"} ] - response = await openai_client.chat.completions.create( - model=model if model in ["openai/gpt-4o", "openai/gpt-4o-mini", "openai/gpt-5", "openai/gpt-5-nano", "openai/gpt-5-mini", "openai/gpt-5-chat"] else "openai/gpt-4o", - messages=messages, - temperature=0.3, - top_p=0.7 - ) + api_params = { + "model": model if model in ["openai/gpt-4o", "openai/gpt-4o-mini", "openai/gpt-5", "openai/gpt-5-nano", "openai/gpt-5-mini", "openai/gpt-5-chat"] else "openai/gpt-4o", + "messages": messages + } + + # Add temperature and top_p only for models that support them (exclude GPT-5 family) + if model not in ["openai/gpt-5", "openai/gpt-5-nano", "openai/gpt-5-mini", "openai/gpt-5-chat"]: + api_params["temperature"] = 0.3 + api_params["top_p"] = 0.7 + + response = await openai_client.chat.completions.create(**api_params) reply = response.choices[0].message.content diff --git a/src/module/message_handler.py b/src/module/message_handler.py index a0ed0e2..845bea3 100644 --- a/src/module/message_handler.py +++ b/src/module/message_handler.py @@ -174,7 +174,7 @@ class MessageHandler: """Create a reusable aiohttp session for better performance""" if self.aiohttp_session is None or self.aiohttp_session.closed: self.aiohttp_session = aiohttp.ClientSession( - timeout=aiohttp.ClientTimeout(total=120), + timeout=aiohttp.ClientTimeout(total=240), connector=aiohttp.TCPConnector(limit=20, ttl_dns_cache=300) ) @@ -857,6 +857,42 @@ class MessageHandler: history.append(current_message) messages_for_api = prepare_messages_for_api(history) + # Proactively trim history to avoid context overload while preserving system prompt + current_tokens = self._count_tokens(messages_for_api) + token_limit = MODEL_TOKEN_LIMITS.get(model, DEFAULT_TOKEN_LIMIT) + max_tokens = int(token_limit * 0.8) # Use 80% of limit to leave room for response + + if current_tokens > max_tokens: + logging.info(f"Proactively trimming history: {current_tokens} tokens > {max_tokens} limit for {model}") + + if model in ["openai/o1-mini", "openai/o1-preview"]: + # For o1 models, trim the history without system prompt + trimmed_history_without_system = self._trim_history_to_token_limit(history_without_system, model, max_tokens) + messages_for_api = prepare_messages_for_api(trimmed_history_without_system) + + # Update the history tracking + history_without_system = trimmed_history_without_system + else: + # For regular models, trim the full history (preserving system prompt) + trimmed_history = self._trim_history_to_token_limit(history, model, max_tokens) + messages_for_api = prepare_messages_for_api(trimmed_history) + + # Update the history tracking + history = trimmed_history + + # Save the trimmed history immediately to keep it in sync + if model in ["openai/o1-mini", "openai/o1-preview"]: + new_history = [] + if system_content: + new_history.append({"role": "system", "content": system_content}) + new_history.extend(history_without_system[1:]) # Skip the "Instructions" message + await self.db.save_history(user_id, new_history) + else: + await self.db.save_history(user_id, history) + + final_tokens = self._count_tokens(messages_for_api) + logging.info(f"History trimmed from {current_tokens} to {final_tokens} tokens") + # Determine which models should have tools available # openai/o1-mini and openai/o1-preview do not support tools use_tools = model in ["openai/gpt-4o", "openai/gpt-4o-mini", "openai/gpt-5", "openai/gpt-5-nano", "openai/gpt-5-mini", "openai/gpt-5-chat", "openai/o1", "openai/o3-mini", "openai/gpt-4.1", "openai/gpt-4.1-mini", "openai/gpt-4.1-nano", "openai/o3", "openai/o4-mini"] @@ -865,11 +901,18 @@ class MessageHandler: api_params = { "model": model, "messages": messages_for_api, - "temperature": 0.3 if model in ["openai/gpt-4o", "openai/gpt-4o-mini", "openai/gpt-5", "openai/gpt-5-nano", "openai/gpt-5-mini", "openai/gpt-5-chat"] else 1, - "top_p": 0.7 if model in ["openai/gpt-4o", "openai/gpt-4o-mini", "openai/gpt-5", "openai/gpt-5-nano", "openai/gpt-5-mini", "openai/gpt-5-chat"] else 1, - "timeout": 120 # Increased timeout for better response handling + "timeout": 240 # Increased timeout for better response handling } + # Add temperature and top_p only for models that support them (exclude GPT-5 family) + if model in ["openai/gpt-4o", "openai/gpt-4o-mini"]: + api_params["temperature"] = 0.3 + api_params["top_p"] = 0.7 + elif model not in ["openai/gpt-5", "openai/gpt-5-nano", "openai/gpt-5-mini", "openai/gpt-5-chat"]: + # For other models (not GPT-4o family and not GPT-5 family) + api_params["temperature"] = 1 + api_params["top_p"] = 1 + # Add tools if using a supported model if use_tools: api_params["tools"] = get_tools_for_model() @@ -879,59 +922,22 @@ class MessageHandler: chart_id = None image_urls = [] # Will store unique image URLs - # Make the initial API call without retry logic to avoid extra costs + # Make the initial API call try: response = await self.client.chat.completions.create(**api_params) except Exception as e: - # Handle 413 Request Entity Too Large error automatically + # Handle 413 Request Entity Too Large error with a user-friendly message if "413" in str(e) or "tokens_limit_reached" in str(e) or "Request body too large" in str(e): - logging.warning(f"Token limit exceeded for model {model}, automatically trimming history...") - - # Trim the history to fit the model's token limit - current_tokens = self._count_tokens(messages_for_api) - logging.info(f"Current message tokens: {current_tokens}") - - if model in ["openai/o1-mini", "openai/o1-preview"]: - # For o1 models, use the trimmed history without system prompt - trimmed_history_without_system = self._trim_history_to_token_limit(history_without_system, model) - messages_for_api = prepare_messages_for_api(trimmed_history_without_system) - else: - # For regular models, trim the full history - trimmed_history = self._trim_history_to_token_limit(history, model) - messages_for_api = prepare_messages_for_api(trimmed_history) - - # Update API parameters with trimmed messages - api_params["messages"] = messages_for_api - - # Save the trimmed history to prevent this issue in the future - if model in ["openai/o1-mini", "openai/o1-preview"]: - # For o1 models, save the trimmed history back to the database - new_history = [] - if system_content: - new_history.append({"role": "system", "content": system_content}) - new_history.extend(trimmed_history_without_system[1:]) # Skip the "Instructions" message - await self.db.save_history(user_id, new_history) - else: - await self.db.save_history(user_id, trimmed_history) - - # Inform user about the automatic cleanup - await message.channel.send("🔧 **Auto-optimized conversation history** - Removed older messages to fit model limits.") - - # Try the API call again with trimmed history - try: - response = await self.client.chat.completions.create(**api_params) - logging.info(f"Successfully processed request after history trimming for model {model}") - except Exception as retry_error: - # If it still fails, provide a helpful error message - await message.channel.send( - f"❌ **Request still too large for {model}**\n" - f"Even after optimizing history, the request is too large.\n" - f"Try:\n" - f"• Using a model with higher token limits\n" - f"• Reducing the size of your current message\n" - f"• Using `/clear_history` to start fresh" - ) - return + await message.channel.send( + f"❌ **Request too large for {model}**\n" + f"Your conversation history or message is too large for this model.\n" + f"Try:\n" + f"• Using `/reset` to start fresh\n" + f"• Using a model with higher token limits\n" + f"• Reducing the size of your current message\n" + f"• Breaking up large files into smaller pieces" + ) + return else: # Re-raise other errors raise e @@ -1007,12 +1013,20 @@ class MessageHandler: # If tool calls were processed, make another API call with the updated messages if tool_calls_processed and updated_messages: - response = await self.client.chat.completions.create( - model=model, - messages=updated_messages, - temperature=0.3 if model in ["openai/gpt-4o", "openai/gpt-4o-mini", "openai/gpt-5", "openai/gpt-5-nano", "openai/gpt-5-mini", "openai/gpt-5-chat"] else 1, - timeout=120 - ) + # Prepare API parameters for follow-up call + follow_up_params = { + "model": model, + "messages": updated_messages, + "timeout": 240 + } + + # Add temperature only for models that support it (exclude GPT-5 family) + if model in ["openai/gpt-4o", "openai/gpt-4o-mini"]: + follow_up_params["temperature"] = 0.3 + elif model not in ["openai/gpt-5", "openai/gpt-5-nano", "openai/gpt-5-mini", "openai/gpt-5-chat"]: + follow_up_params["temperature"] = 1 + + response = await self.client.chat.completions.create(**follow_up_params) reply = response.choices[0].message.content diff --git a/src/utils/openai_utils.py b/src/utils/openai_utils.py index 8bcfc27..7d193de 100644 --- a/src/utils/openai_utils.py +++ b/src/utils/openai_utils.py @@ -192,7 +192,7 @@ def get_tools_for_model() -> List[Dict[str, Any]]: "input_data": {"type": "string", "description": "Optional input data"}, "install_packages": {"type": "array", "items": {"type": "string"}}, "enable_visualization": {"type": "boolean", "description": "For charts/graphs"}, - "timeout": {"type": "integer", "default": 30, "minimum": 1, "maximum": 120} + "timeout": {"type": "integer", "default": 30, "minimum": 1, "maximum": 240} }, "required": ["code"] } @@ -429,15 +429,21 @@ for col in numeric_cols[:3]: # Limit to first 3 columns async def call_openai_api(client, messages, model, temperature=0.7, max_tokens=None, tools=None): """Call OpenAI API without retry logic to avoid extra costs.""" try: + # Prepare API parameters + api_params = { + "model": model, + "messages": messages, + "max_tokens": max_tokens, + "tools": tools, + "tool_choice": "auto" if tools else None + } + + # Add temperature only for models that support it (exclude GPT-5 family) + if model not in ["openai/gpt-5", "openai/gpt-5-nano", "openai/gpt-5-mini", "openai/gpt-5-chat"]: + api_params["temperature"] = temperature + # Single API call without retries - response = await client.chat.completions.create( - model=model, - messages=messages, - temperature=temperature, - max_tokens=max_tokens, - tools=tools, - tool_choice="auto" if tools else None - ) + response = await client.chat.completions.create(**api_params) return response except Exception as e: logging.error(f"OpenAI API call failed: {str(e)}") diff --git a/src/utils/pdf_utils.py b/src/utils/pdf_utils.py index 1d79f87..4ab1431 100644 --- a/src/utils/pdf_utils.py +++ b/src/utils/pdf_utils.py @@ -156,11 +156,16 @@ async def process_pdf_batch(model: str, client, user_prompt: str, batch_content: ] # Add await here - response = await client.chat.completions.create( - model=model, - messages=messages, - temperature=0.1 - ) + api_params = { + "model": model, + "messages": messages + } + + # Add temperature only for models that support it (exclude GPT-5 family) + if model not in ["openai/gpt-5", "openai/gpt-5-nano", "openai/gpt-5-mini", "openai/gpt-5-chat"]: + api_params["temperature"] = 0.1 + + response = await client.chat.completions.create(**api_params) reply = response.choices[0].message.content