o
    Ag(                     @  s   d dl mZ d dlZd dlmZ d dlmZmZmZm	Z	m
Z
 d dlZd dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ G d
d deZdS )    )annotationsN)StringIO)AnyDictIteratorListOptional)CallbackManagerForLLMRun)LLM)GenerationChunk)get_pydantic_field_names)
ConfigDictc                   @  s  e Zd ZU dZdZded< 	 dZded< 	 dZd	ed
< 	 dZded< 	 dZ	ded< 	 dZ
ded< 	 dZded< 	 dZded< 	 dZded< 	 dZded< 	 dZded< 	 dZded< 	 dZded< 	 dZded < 	 d!Zd	ed"< 	 d#Zded$< 	 d#Zded%< 	 dZded&< 	 d'Zded(< 	 d)Zded*< 	 ed+d,ZedGd.d/ZedHd1d2ZedId4d5Z	dJdKd:d;Z		dLdMd?d@Z 		dLdNdBdCZ!dOdEdFZ"dS )P	LlamafileaO  Llamafile lets you distribute and run large language models with a
    single file.

    To get started, see: https://github.com/Mozilla-Ocho/llamafile

    To use this class, you will need to first:

    1. Download a llamafile.
    2. Make the downloaded file executable: `chmod +x path/to/model.llamafile`
    3. Start the llamafile in server mode:

        `./path/to/model.llamafile --server --nobrowser`

    Example:
        .. code-block:: python

            from langchain_community.llms import Llamafile
            llm = Llamafile()
            llm.invoke("Tell me a joke.")
    zhttp://localhost:8080strbase_urlNzOptional[int]request_timeoutFbool	streamingintseedg?floattemperature(   top_kgffffff?top_pg?min_p	n_predictr   n_keepg      ?tfs_z	typical_pg?repeat_penalty@   repeat_last_nTpenalize_nlg        presence_penaltyfrequency_penaltymirostatg      @mirostat_taug?mirostat_etaforbid)extrareturnc                 C  s   dS )N	llamafile )selfr.   r.   i/var/www/html/development/chatbot/venv/lib/python3.10/site-packages/langchain_community/llms/llamafile.py	_llm_typex   s   zLlamafile._llm_type	List[str]c                   s$   g d  fddt | jD }|S )N)r   cachecallback_manager	callbacksmetadatanamer   r   tagsverbosecustom_get_token_idsc                   s   g | ]}| vr|qS r.   r.   ).0kignore_keysr.   r0   
<listcomp>   s    z/Llamafile._param_fieldnames.<locals>.<listcomp>)r   	__class__)r/   attrsr.   r=   r0   _param_fieldnames|   s
   
zLlamafile._param_fieldnamesDict[str, Any]c                 C  s"   i }| j D ]	}t| |||< q|S N)rB   getattr)r/   params	fieldnamer.   r.   r0   _default_params   s   
zLlamafile._default_paramsstopOptional[List[str]]kwargsr   c                 K  sV   | j }| D ]\}}||v r|||< q|d ur"t|dkr"||d< | jr)d|d< |S )Nr   rI   Tstream)rH   itemslenr   )r/   rI   rK   rF   r<   vr.   r.   r0   _get_parameters   s   zLlamafile._get_parameterspromptrun_manager"Optional[CallbackManagerForLLMRun]c                 K  s   | j r2t "}| j|f||d|D ]}||j q| }W d   |S 1 s+w   Y  |S | jdd|i|}d|i|}	ztj| j	 dddi|	d| j
d	}
W n tjjyi   tjd
| j	 dw |
  d|
_|
 d }|S )a  Request prompt completion from the llamafile server and return the
        output.

        Args:
            prompt: The prompt to use for generation.
            stop: A list of strings to stop generation when encountered.
            run_manager:
            **kwargs: Any additional options to pass as part of the
            generation request.

        Returns:
            The string generated by the model.

        )rI   rR   NrI   rQ   /completionContent-Typeapplication/jsonFurlheadersjsonrL   timeoutTCould not connect to Llamafile server. Please make sure that a server is running at .zutf-8contentr.   )r   r   _streamwritetextgetvaluerP   requestspostr   r   
exceptionsConnectionErrorraise_for_statusencodingrZ   )r/   rQ   rI   rR   rK   buffchunkra   rF   payloadresponser.   r.   r0   _call   sJ   




	zLlamafile._callIterator[GenerationChunk]c                 k  s    | j dd|i|}d|vrd|d< d|i|}ztj| j dddi|d| jd}W n tjjy@   tjd	| j d
w d|_|jddD ]}| 	|}	t
|	d}
|r_|j|
jd |
V  qJdS )a"  Yields results objects as they are generated in real time.

        It also calls the callback manager's on_llm_new_token event with
        similar parameters to the OpenAI LLM class method of the same name.

        Args:
            prompt: The prompts to pass into the model.
            stop: Optional list of stop words to use when generating.
            run_manager:
            **kwargs: Any additional options to pass as part of the
            generation request.

        Returns:
            A generator representing the stream of tokens being generated.

        Yields:
            Dictionary-like objects each containing a token

        Example:
        .. code-block:: python

            from langchain_community.llms import Llamafile
            llm = Llamafile(
                temperature = 0.0
            )
            for chunk in llm.stream("Ask 'Hi, how are you?' like a pirate:'",
                    stop=["'","
"]):
                result = chunk["choices"][0]
                print(result["text"], end='', flush=True)

        rI   rL   TrQ   rT   rU   rV   rW   r\   r]   utf8)decode_unicode)ra   )tokenNr.   )rP   rc   rd   r   r   re   rf   rh   
iter_lines_get_chunk_contentr   on_llm_new_tokenra   )r/   rQ   rI   rR   rK   rF   rk   rl   	raw_chunkr^   rj   r.   r.   r0   r_      s:   &

	

zLlamafile._streamrj   c                 C  s*   | dr|d}t|}|d S |S )zWhen streaming is turned on, llamafile server returns lines like:

        'data: {"content":" They","multimodal":true,"slot_id":0,"stop":false}'

        Here, we convert this to a dict and return the value of the 'content'
        field
        zdata:zdata: r^   )
startswithlstriprZ   loads)r/   rj   cleaneddatar.   r.   r0   rs   1  s
   
	

zLlamafile._get_chunk_content)r,   r   )r,   r2   )r,   rC   rD   )rI   rJ   rK   r   r,   rC   )NN)
rQ   r   rI   rJ   rR   rS   rK   r   r,   r   )
rQ   r   rI   rJ   rR   rS   rK   r   r,   rn   )rj   r   r,   r   )#__name__
__module____qualname____doc__r   __annotations__r   r   r   r   r   r   r   r   r   r   r    r!   r#   r$   r%   r&   r'   r(   r)   r   model_configpropertyr1   rB   rH   rP   rm   r_   rs   r.   r.   r.   r0   r      sx   
 ?Fr   )
__future__r   rZ   ior   typingr   r   r   r   r   rc    langchain_core.callbacks.managerr	   #langchain_core.language_models.llmsr
   langchain_core.outputsr   langchain_core.utilsr   pydanticr   r   r.   r.   r.   r0   <module>   s    