importosfromllama_index.llms.konkoimportKonkofromllama_index.core.llmsimportChatMessage# Set up the Konko LLM with the desired modelllm=Konko(model="meta-llama/llama-2-13b-chat")# Set the Konko API keyos.environ["KONKO_API_KEY"]="<your-api-key>"# Create a ChatMessage objectmessage=ChatMessage(role="user",content="Explain Big Bang Theory briefly")# Call the chat method with the ChatMessage objectresponse=llm.chat([message])# Print the responseprint(response)
classKonko(LLM):""" Konko LLM. Examples: `pip install llama-index-llms-konko` ```python import os from llama_index.llms.konko import Konko from llama_index.core.llms import ChatMessage # Set up the Konko LLM with the desired model llm = Konko(model="meta-llama/llama-2-13b-chat") # Set the Konko API key os.environ["KONKO_API_KEY"] = "<your-api-key>" # Create a ChatMessage object message = ChatMessage(role="user", content="Explain Big Bang Theory briefly") # Call the chat method with the ChatMessage object response = llm.chat([message]) # Print the response print(response) ``` """model:str=Field(default=DEFAULT_KONKO_MODEL,description="The konko model to use.")temperature:float=Field(default=DEFAULT_TEMPERATURE,description="The temperature to use during generation.",ge=0.0,le=1.0,)max_tokens:Optional[int]=Field(default=DEFAULT_NUM_OUTPUTS,description="The maximum number of tokens to generate.",gt=0,)additional_kwargs:Dict[str,Any]=Field(default_factory=dict,description="Additional kwargs for the konko API.")max_retries:int=Field(default=10,description="The maximum number of API retries.",ge=0)konko_api_key:str=Field(default=None,description="The konko API key.")openai_api_key:str=Field(default=None,description="The Openai API key.")api_type:str=Field(default=None,description="The konko API type.")model_info_dict:Dict[str,ModelInfo]def__init__(self,model:str=DEFAULT_KONKO_MODEL,temperature:float=DEFAULT_TEMPERATURE,max_tokens:Optional[int]=DEFAULT_NUM_OUTPUTS,additional_kwargs:Optional[Dict[str,Any]]=None,max_retries:int=10,konko_api_key:Optional[str]=None,openai_api_key:Optional[str]=None,api_type:Optional[str]=None,api_base:Optional[str]=None,api_version:Optional[str]=None,callback_manager:Optional[CallbackManager]=None,system_prompt:Optional[str]=None,messages_to_prompt:Optional[Callable[[Sequence[ChatMessage]],str]]=None,completion_to_prompt:Optional[Callable[[str],str]]=None,pydantic_program_mode:PydanticProgramMode=PydanticProgramMode.DEFAULT,output_parser:Optional[BaseOutputParser]=None,model_info_dict:Optional[Dict[str,ModelInfo]]=None,**kwargs:Any,)->None:additional_kwargs=additional_kwargsor{}(konko_api_key,openai_api_key,api_type,api_base,api_version,)=resolve_konko_credentials(konko_api_key=konko_api_key,openai_api_key=openai_api_key,api_type=api_type,api_base=api_base,api_version=api_version,)super().__init__(model=model,temperature=temperature,max_tokens=max_tokens,additional_kwargs=additional_kwargs,max_retries=max_retries,callback_manager=callback_manager,konko_api_key=konko_api_key,openai_api_key=openai_api_key,api_type=api_type,api_version=api_version,api_base=api_base,system_prompt=system_prompt,messages_to_prompt=messages_to_prompt,completion_to_prompt=completion_to_prompt,pydantic_program_mode=pydantic_program_mode,output_parser=output_parser,model_info_dict=self._create_model_info_dict(),**kwargs,)def_get_model_name(self)->str:returnself.model@classmethoddefclass_name(cls)->str:return"Konko_LLM"def_create_model_info_dict(self)->Dict[str,ModelInfo]:models_info_dict={}ifis_openai_v1():models=konko.models.list().dataformodelinmodels:model_info=ModelInfo(name=model.name,max_context_length=model.max_context_length,is_chat_model=model.is_chat,)models_info_dict[model.name]=model_infoelse:models=konko.Model.list().dataformodelinmodels:model_info=ModelInfo(name=model["name"],max_context_length=model["max_context_length"],is_chat_model=model["is_chat"],)models_info_dict[model["name"]]=model_inforeturnmodels_info_dictdef_get_model_info(self)->ModelInfo:model_name=self._get_model_name()model_info=self.model_info_dict.get(model_name)ifmodel_infoisNone:raiseValueError(f"Unknown model: {model_name}. Please provide a valid Konko model name. ""Known models are: "+", ".join(self.model_info_dict.keys()))returnmodel_infodef_is_chat_model(self)->bool:""" Check if the specified model is a chat model. Args: - model_id (str): The ID of the model to check. Returns: - bool: True if the model is a chat model, False otherwise. Raises: - ValueError: If the model_id is not found in the list of models. """model_info=self._get_model_info()returnmodel_info.is_chat_model@propertydefmetadata(self)->LLMMetadata:model_info=self._get_model_info()returnLLMMetadata(context_window=model_info.max_context_length,num_output=self.max_tokens,is_chat_model=model_info.is_chat_model,model_name=self.model,)@llm_chat_callback()defchat(self,messages:Sequence[ChatMessage],**kwargs:Any)->ChatResponse:ifself._is_chat_model():chat_fn=self._chatelse:chat_fn=completion_to_chat_decorator(self._complete)returnchat_fn(messages,**kwargs)@llm_chat_callback()defstream_chat(self,messages:Sequence[ChatMessage],**kwargs:Any)->ChatResponseGen:ifself._is_chat_model():stream_chat_fn=self._stream_chatelse:stream_chat_fn=stream_completion_to_chat_decorator(self._stream_complete)returnstream_chat_fn(messages,**kwargs)@propertydef_credential_kwargs(self)->Dict[str,Any]:return{"konko_api_key":self.konko_api_key,"api_type":self.api_type,"openai_api_key":self.openai_api_key,}@propertydef_model_kwargs(self)->Dict[str,Any]:base_kwargs={"model":self.model,"temperature":self.temperature,"max_tokens":self.max_tokens,}return{**base_kwargs,**self.additional_kwargs,}def_get_all_kwargs(self,**kwargs:Any)->Dict[str,Any]:return{**self._model_kwargs,**kwargs,}def_chat(self,messages:Sequence[ChatMessage],**kwargs:Any)->ChatResponse:ifnotself._is_chat_model():raiseValueError("This model is not a chat model.")message_dicts=to_openai_message_dicts(messages)all_kwargs=self._get_all_kwargs(**kwargs)response=completion_with_retry(is_chat_model=self._is_chat_model(),max_retries=self.max_retries,messages=message_dicts,stream=False,**all_kwargs,)ifis_openai_v1():message_dict=response.choices[0].messageelse:message_dict=response["choices"][0]["message"]message=from_openai_message_dict(message_dict)returnChatResponse(message=message,raw=response,additional_kwargs=self._get_response_token_counts(response),)def_stream_chat(self,messages:Sequence[ChatMessage],**kwargs:Any)->ChatResponseGen:ifnotself._is_chat_model():raiseValueError("This model is not a chat model.")message_dicts=to_openai_message_dicts(messages)all_kwargs=self._get_all_kwargs(**kwargs)defgen()->ChatResponseGen:content=""forresponseincompletion_with_retry(is_chat_model=self._is_chat_model(),max_retries=self.max_retries,messages=message_dicts,stream=True,**all_kwargs,):ifis_openai_v1():iflen(response.choices)==0andresponse.prompt_annotations:continuedelta=(response.choices[0].deltaiflen(response.choices)>0else{})role_value=delta.rolecontent_delta=delta.contentor""else:if"choices"notinresponseorlen(response["choices"])==0:continuedelta=response["choices"][0].get("delta",{})role_value=delta["role"]content_delta=delta["content"]or""role=role_valueifrole_valueisnotNoneelse"assistant"content+=content_deltayieldChatResponse(message=ChatMessage(role=role,content=content,),delta=content_delta,raw=response,additional_kwargs=self._get_response_token_counts(response),)returngen()@llm_completion_callback()defcomplete(self,prompt:str,formatted:bool=False,**kwargs:Any)->CompletionResponse:ifself._is_chat_model():complete_fn=chat_to_completion_decorator(self._chat)else:complete_fn=self._completereturncomplete_fn(prompt,**kwargs)@llm_completion_callback()defstream_complete(self,prompt:str,formatted:bool=False,**kwargs:Any)->CompletionResponseGen:ifself._is_chat_model():stream_complete_fn=stream_chat_to_completion_decorator(self._stream_chat)else:stream_complete_fn=self._stream_completereturnstream_complete_fn(prompt,**kwargs)def_get_response_token_counts(self,raw_response:Any)->dict:"""Get the token usage reported by the response."""ifnotisinstance(raw_response,dict):return{}usage=raw_response.get("usage",{})# NOTE: other model providers that use the OpenAI client may not report usageifusageisNone:return{}return{"prompt_tokens":usage.get("prompt_tokens",0),"completion_tokens":usage.get("completion_tokens",0),"total_tokens":usage.get("total_tokens",0),}def_complete(self,prompt:str,**kwargs:Any)->CompletionResponse:ifself._is_chat_model():raiseValueError("This model is a chat model.")all_kwargs=self._get_all_kwargs(**kwargs)ifself.max_tokensisNone:# NOTE: non-chat completion endpoint requires max_tokens to be setmax_tokens=self._get_max_token_for_prompt(prompt)all_kwargs["max_tokens"]=max_tokensresponse=completion_with_retry(is_chat_model=self._is_chat_model(),max_retries=self.max_retries,prompt=prompt,stream=False,**all_kwargs,)ifis_openai_v1():text=response.choices[0].textelse:text=response["choices"][0]["text"]returnCompletionResponse(text=text,raw=response,additional_kwargs=self._get_response_token_counts(response),)def_stream_complete(self,prompt:str,**kwargs:Any)->CompletionResponseGen:ifself._is_chat_model():raiseValueError("This model is a chat model.")all_kwargs=self._get_all_kwargs(**kwargs)ifself.max_tokensisNone:# NOTE: non-chat completion endpoint requires max_tokens to be setmax_tokens=self._get_max_token_for_prompt(prompt)all_kwargs["max_tokens"]=max_tokensdefgen()->CompletionResponseGen:text=""forresponseincompletion_with_retry(is_chat_model=self._is_chat_model(),max_retries=self.max_retries,prompt=prompt,stream=True,**all_kwargs,):ifis_openai_v1():iflen(response.choices)>0:delta=response.choices[0].textelse:delta=""else:iflen(response["choices"])>0:delta=response["choices"][0].textelse:delta=""text+=deltayieldCompletionResponse(delta=delta,text=text,raw=response,additional_kwargs=self._get_response_token_counts(response),)returngen()def_get_max_token_for_prompt(self,prompt:str)->int:try:importtiktokenexceptImportError:raiseImportError("Please install tiktoken to use the max_tokens=None feature.")context_window=self.metadata.context_windowencoding=tiktoken.encoding_for_model(self._get_model_name())tokens=encoding.encode(prompt)max_token=context_window-len(tokens)ifmax_token<=0:raiseValueError(f"The prompt is too long for the model. "f"Please use a prompt that is less than {context_window} tokens.")returnmax_token# ===== Async Endpoints =====@llm_chat_callback()asyncdefachat(self,messages:Sequence[ChatMessage],**kwargs:Any,)->ChatResponse:achat_fn:Callable[...,Awaitable[ChatResponse]]ifself._is_chat_model():achat_fn=self._achatelse:achat_fn=acompletion_to_chat_decorator(self._acomplete)returnawaitachat_fn(messages,**kwargs)@llm_chat_callback()asyncdefastream_chat(self,messages:Sequence[ChatMessage],**kwargs:Any,)->ChatResponseAsyncGen:astream_chat_fn:Callable[...,Awaitable[ChatResponseAsyncGen]]ifself._is_chat_model():astream_chat_fn=self._astream_chatelse:astream_chat_fn=astream_completion_to_chat_decorator(self._astream_complete)returnawaitastream_chat_fn(messages,**kwargs)@llm_completion_callback()asyncdefacomplete(self,prompt:str,formatted:bool=False,**kwargs:Any)->CompletionResponse:ifself._is_chat_model():acomplete_fn=achat_to_completion_decorator(self._achat)else:acomplete_fn=self._acompletereturnawaitacomplete_fn(prompt,**kwargs)@llm_completion_callback()asyncdefastream_complete(self,prompt:str,formatted:bool=False,**kwargs:Any)->CompletionResponseAsyncGen:ifself._is_chat_model():astream_complete_fn=astream_chat_to_completion_decorator(self._astream_chat)else:astream_complete_fn=self._astream_completereturnawaitastream_complete_fn(prompt,**kwargs)asyncdef_achat(self,messages:Sequence[ChatMessage],**kwargs:Any)->ChatResponse:ifnotself._is_chat_model():raiseValueError("This model is not a chat model.")message_dicts=to_openai_message_dicts(messages)all_kwargs=self._get_all_kwargs(**kwargs)response=awaitacompletion_with_retry(is_chat_model=self._is_chat_model(),max_retries=self.max_retries,messages=message_dicts,stream=False,**all_kwargs,)ifis_openai_v1:# type: ignoremessage_dict=response.choices[0].messageelse:message_dict=response["choices"][0]["message"]message=from_openai_message_dict(message_dict)returnChatResponse(message=message,raw=response,additional_kwargs=self._get_response_token_counts(response),)asyncdef_astream_chat(self,messages:Sequence[ChatMessage],**kwargs:Any)->ChatResponseAsyncGen:ifnotself._is_chat_model():raiseValueError("This model is not a chat model.")message_dicts=to_openai_message_dicts(messages)all_kwargs=self._get_all_kwargs(**kwargs)asyncdefgen()->ChatResponseAsyncGen:content=""_function_call:Optional[dict]=Noneasyncforresponseinawaitacompletion_with_retry(is_chat_model=self._is_chat_model(),max_retries=self.max_retries,messages=message_dicts,stream=True,**all_kwargs,):ifis_openai_v1():iflen(response.choices)>0:delta=response.choices[0].deltaelse:delta={}role=delta.rolecontent_delta=delta.contentelse:iflen(response["choices"])>0:delta=response["choices"][0].deltaelse:delta={}role=delta["role"]content_delta=delta["content"]content+=content_deltayieldChatResponse(message=ChatMessage(role=role,content=content,),delta=content_delta,raw=response,additional_kwargs=self._get_response_token_counts(response),)returngen()asyncdef_acomplete(self,prompt:str,**kwargs:Any)->CompletionResponse:ifself._is_chat_model():raiseValueError("This model is a chat model.")all_kwargs=self._get_all_kwargs(**kwargs)ifself.max_tokensisNone:# NOTE: non-chat completion endpoint requires max_tokens to be setmax_tokens=self._get_max_token_for_prompt(prompt)all_kwargs["max_tokens"]=max_tokensresponse=awaitacompletion_with_retry(is_chat_model=self._is_chat_model(),max_retries=self.max_retries,prompt=prompt,stream=False,**all_kwargs,)ifis_openai_v1():text=response.choices[0].textelse:text=response["choices"][0]["text"]returnCompletionResponse(text=text,raw=response,additional_kwargs=self._get_response_token_counts(response),)asyncdef_astream_complete(self,prompt:str,**kwargs:Any)->CompletionResponseAsyncGen:ifself._is_chat_model():raiseValueError("This model is a chat model.")all_kwargs=self._get_all_kwargs(**kwargs)ifself.max_tokensisNone:# NOTE: non-chat completion endpoint requires max_tokens to be setmax_tokens=self._get_max_token_for_prompt(prompt)all_kwargs["max_tokens"]=max_tokensasyncdefgen()->CompletionResponseAsyncGen:text=""asyncforresponseinawaitacompletion_with_retry(is_chat_model=self._is_chat_model(),max_retries=self.max_retries,prompt=prompt,stream=True,**all_kwargs,):ifis_openai_v1():iflen(response.choices)>0:delta=response.choices[0].textelse:delta=""else:iflen(response["choices"])>0:delta=response["choices"][0].textelse:delta=""text+=deltayieldCompletionResponse(delta=delta,text=text,raw=response,additional_kwargs=self._get_response_token_counts(response),)returngen()