o
    Ag\                     @  sn   d dl mZ d dlZd dlmZmZmZmZmZ d dl	m
Z
mZ G dd deZdddZG dd deZdS )    )annotationsN)AnyListLiteralOptionalUnion)LanguageTextSplitterc                      s0   e Zd ZdZ	dd fddZdddZ  ZS )CharacterTextSplitterz(Splitting text that looks at characters.

F	separatorstris_separator_regexboolkwargsr   returnNonec                   s"   t  jdi | || _|| _dS )Create a new TextSplitter.N )super__init__
_separator_is_separator_regex)selfr   r   r   	__class__r   i/var/www/html/development/chatbot/venv/lib/python3.10/site-packages/langchain_text_splitters/character.pyr      s   
zCharacterTextSplitter.__init__text	List[str]c                 C  sB   | j r| jnt| j}t||| j}| jrdn| j}| ||S )&Split incoming text and return chunks. )r   r   reescape_split_text_with_regex_keep_separator_merge_splits)r   r   r   splitsr   r   r   r   
split_text   s
   z CharacterTextSplitter.split_text)r   F)r   r   r   r   r   r   r   r   r   r   r   r   )__name__
__module____qualname____doc__r   r'   __classcell__r   r   r   r   r
   	   s
    r
   r   r   r   keep_separator$Union[bool, Literal['start', 'end']]r   r   c                   s   |r\|rUt d| d|  |dkr# fddtdt d dD n fd	dtdt dD }t d dkrB| d
d  7 }|dkrM| d
 g n d g| }nt || }nt| }dd |D S )N()endc                       g | ]} |  |d    qS    r   .0i_splitsr   r   
<listcomp>(        z*_split_text_with_regex.<locals>.<listcomp>r   r5      c                   r3   r4   r   r6   r9   r   r   r;   *   r<   c                 S  s   g | ]}|d kr|qS )r    r   )r7   sr   r   r   r;   7   s    )r!   splitrangelenlist)r   r   r.   r&   r   r9   r   r#      s    "r#   c                      sZ   e Zd ZdZ			dd fddZd ddZd!ddZed"ddZe	d#ddZ
  ZS )$RecursiveCharacterTextSplitterzSplitting text by recursively look at characters.

    Recursively tries to split by different characters to find one
    that works.
    NTF
separatorsOptional[List[str]]r.   r/   r   r   r   r   r   r   c                   s.   t  jdd|i| |pg d| _|| _dS )r   r.   )r   
 r    Nr   )r   r   _separatorsr   )r   rE   r.   r   r   r   r   r   r   A   s   
z'RecursiveCharacterTextSplitter.__init__r   r   r   c                 C  s&  g }|d }g }t |D ](\}}| jr|nt|}|dkr"|} nt||r4|}||d d } nq| jr:|nt|}t||| j}	g }
| jrMdn|}|	D ]2}| || jk ra|
	| qQ|
rp| 
|
|}|| g }
|sx|	| qQ| ||}|| qQ|
r| 
|
|}|| |S )r   r>   r    r5   N)	enumerater   r!   r"   searchr#   r$   _length_function_chunk_sizeappendr%   extend_split_text)r   r   rE   final_chunksr   new_separatorsr8   _sr   r&   _good_splitsr?   merged_text
other_infor   r   r   rP   M   s@   

z*RecursiveCharacterTextSplitter._split_textc                 C  s   |  || jS )zSplit the input text into smaller chunks based on predefined separators.

        Args:
            text (str): The input text to be split.

        Returns:
            List[str]: A list of text chunks obtained after splitting.
        )rP   rI   )r   r   r   r   r   r'   u   s   	z)RecursiveCharacterTextSplitter.split_textlanguager   c                 K  s   |  |}| d|dd|S )a  Return an instance of this class based on a specific language.

        This method initializes the text splitter with language-specific separators.

        Args:
            language (Language): The language to configure the text splitter for.
            **kwargs (Any): Additional keyword arguments to customize the splitter.

        Returns:
            RecursiveCharacterTextSplitter: An instance of the text splitter configured
            for the specified language.
        T)rE   r   Nr   )get_separators_for_language)clsrW   r   rE   r   r   r   from_language   s   
z,RecursiveCharacterTextSplitter.from_languagec                 C  s  | t jks
| t jkrg dS | t jkrg dS | t jkr g dS | t jkr)g dS | t jkr2g dS | t jkr;g dS | t jkrDg dS | t j	krMg dS | t j
krVg d	S | t jkr_g d
S | t jkrhg dS | t jkrqg dS | t jkrzg dS | t jkrg dS | t jkrg dS | t jkrg dS | t jkrg dS | t jkrg dS | t jkrg dS | t jkrg dS | t jkrg dS | t jkrg dS | t jkrg dS | t jkrg dS | t jv rtd|  dtd|  dtt  )a
  Retrieve a list of separators specific to the given language.

        Args:
            language (Language): The language for which to get the separators.

        Returns:
            List[str]: A list of separators appropriate for the specified language.
        )
class z
void z
int z
float z
double 
if 
for 
while 
switch 
case r   rG   rH   r    )
func 
var 
const 
type r\   r]   r_   r`   r   rG   rH   r    )r[   
public 
protected 	
private 
static r\   r]   r^   r_   r`   r   rG   rH   r    )r[   re   rf   rg   z

internal z
companion z
fun 
val rb   r\   r]   r^   z
when r`   
else r   rG   rH   r    )

function rc   
let rb   r[   r\   r]   r^   r_   r`   	
default r   rG   rH   r    )
enum 
interface z
namespace rd   r[   rk   rc   rl   rb   r\   r]   r^   r_   r`   rm   r   rG   rH   r    )rk   r[   r\   	
foreach r^   
do r_   r`   r   rG   rH   r    )
z	
message z	
service rn   z
option 
import z
syntax r   rG   rH   r    )r[   
def z
	def r   rG   rH   r    )z
=+
z
-+
z
\*+
z

.. *

r   rG   rH   r    )rs   r[   r\   
unless r^   r]   rq   z
begin z
rescue r   rG   rH   r    )rs   z
defp z
defmodule z
defprotocol z

defmacro z
defmacrop r\   rt   r^   r`   z
cond z
with r]   rq   r   rG   rH   r    )z
fn rc   rl   r\   r^   r]   z
loop 
match rc   r   rG   rH   r    )r[   z
object rs   ri   rb   r\   r]   r^   ru   r`   r   rG   rH   r    )ra   r[   
struct rn   r\   r]   r^   rq   r_   r`   r   rG   rH   r    )	z
#{1,6} z```
z	
\*\*\*+
z
---+
z
___+
r   rG   rH   r    )z
\\chapter{z
\\section{z
\\subsection{z
\\subsubsection{z
\\begin{enumerate}z
\\begin{itemize}z
\\begin{description}z
\\begin{list}z
\\begin{quote}z
\\begin{quotation}z
\\begin{verse}z
\\begin{verbatim}z
\\begin{align}z$$$rH   r    )z<bodyz<divz<pz<brz<liz<h1z<h2z<h3z<h4z<h5z<h6z<spanz<tablez<trz<tdz<thz<ulz<olz<headerz<footerz<navz<headz<stylez<scriptz<metaz<titler    )ro   rn   z
implements z

delegate 
event r[   z

abstract re   rf   rg   rh   z
return r\   z

continue r]   rp   r^   r_   z
break r`   rj   
try z
throw 	
finally 
catch r   rG   rH   r    )z
pragma z
using z

contract ro   z	
library z
constructor rd   rk   rx   z

modifier z
error rv   rn   r\   r]   r^   z

do while z

assembly r   rG   rH   r    )z
IDENTIFICATION DIVISION.z
ENVIRONMENT DIVISION.z
DATA DIVISION.z
PROCEDURE DIVISION.z
WORKING-STORAGE SECTION.z
LINKAGE SECTION.z
FILE SECTION.z
INPUT-OUTPUT SECTION.z
OPEN z
CLOSE z
READ z
WRITE z
IF z
ELSE z
MOVE z	
PERFORM z
UNTIL z	
VARYING z
ACCEPT z	
DISPLAY z

STOP RUN.rG   rH   r    )
z
local rk   r\   r]   r^   z
repeat r   rG   rH   r    )z	
main :: z
main = rl   z
in rq   z
where 
:: z
= 
data z	
newtype rd   r|   z
module rr   z
qualified z
import qualified r[   z

instance r`   z
| r}   z
= {z
, r   rG   rH   r    )rk   z
param r\   rp   r]   r^   r_   r[   ry   r{   rz   r   rG   rH   r    z	Language z is not implemented yet!z& is not supported! Please choose from )r   CCPPGOJAVAKOTLINJSTSPHPPROTOPYTHONRSTRUBYELIXIRRUSTSCALASWIFTMARKDOWNLATEXHTMLCSHARPSOLCOBOLLUAHASKELL
POWERSHELL_value2member_map_
ValueErrorrC   )rW   r   r   r   rX      sn   



















$



&
z:RecursiveCharacterTextSplitter.get_separators_for_language)NTF)
rE   rF   r.   r/   r   r   r   r   r   r   )r   r   rE   r   r   r   r(   )rW   r   r   r   r   rD   )rW   r   r   r   )r)   r*   r+   r,   r   rP   r'   classmethodrZ   staticmethodrX   r-   r   r   r   r   rD   :   s    

(rD   )r   r   r   r   r.   r/   r   r   )
__future__r   r!   typingr   r   r   r   r   langchain_text_splitters.baser   r	   r
   r#   rD   r   r   r   r   <module>   s    
