prompts.py


from typing import Any
from langchain.prompts import StringPromptTemplate,PromptTemplate
from pydantic import BaseModel, validator
from langchain.chains.router.llm_router import RouterOutputParser


# template="""
# {knowledge}
# 请从上述内容中提取 {elements} 这些元素的信息，每个元素以 "名称：值" 的形式输出，空行分隔
# """

template="""
{knowledge}

请从上述提供的合同文本中提取出指定元素相关的信息，并以“名称：值”的格式输出识别结果，用空行分隔，不要使用 markdown 格式。例如：
测试元素1：test
测试元素2：test2

你需要提取的元素有：
{elements}
"""

prompt=PromptTemplate.from_template(template)


class ElementsPromptTemplate(StringPromptTemplate, BaseModel):
        
    def format(self, **kwargs) -> str:
        if "elements" not in kwargs or kwargs["elements"] is None:
            elements = []
        else:
            elements=kwargs["elements"]
            # elements = "\n".join([f"{i+1}. {e}" for i, e in enumerate(elements)])
            elements = "\n".join(elements)
            print(elements)
        if "knowledge" not in kwargs:
            raise ValueError("knowledge is required")
        knowledge = kwargs["knowledge"]

        return prompt.format(knowledge=knowledge, elements=elements)

    def _prompt_type(self):
        return "contract-elements"


template_foreach="""
{knowledge}

请从上述提供的合同文本中提取出 {element} 的信息，并以“{element}：”的形式进行输出。
"""

prompt_foreach=PromptTemplate.from_template(template_foreach)

class ElementPromptTemplate(StringPromptTemplate, BaseModel):
        
    def format(self, **kwargs) -> str:
        if "element" not in kwargs or kwargs["element"] is None:
            element =  ""
        else:
            element=kwargs["element"]
        if "knowledge" not in kwargs:
            raise ValueError("knowledge is required")
        knowledge = kwargs["knowledge"]

        return prompt_foreach.format(knowledge=knowledge, element=element)

    def _prompt_type(self):
        return "contract-element"

#========================
#   documentqa chain prompt
#   documentqa chain prompt
#   documentqa chain prompt
#========================

#========================
#   GLM  refine
#========================
#------------------------
#   总结
#------------------------
summarise_prompt = "对以下内容进行简要总结:\n----------\n{text}\n----------\n,总结："
refine_summarise_prompt = "你的工作是生成总结。\n我们已经提供了一段摘要:{existing_answer}\n我们需要从下面的上下文中补充和完善摘要，如果上下文没有用处，请返回原始摘要：\n----------\n{text}\n----------\n"
SUMMARISE_PROMPT = PromptTemplate(
        input_variables=["text"],
        template=summarise_prompt,
        #template="Define {concept} with a real-world example?",
    )
REFINE_SUMMARISE_PROMPT = PromptTemplate(
        input_variables=["existing_answer", "text"],
        template=refine_summarise_prompt,
        #template="Define {concept} with a real-world example?",
    )

#------------------------
#   提问
#------------------------
qa_prompt = """请根据下面的材料回答问题："{question}"，只根据材料内容进行回答，如果问题与提供的材料无关，请回答"对不起，我不知道"，另外也不要回答无关答案：
-------------------
{context}
-------------------"""
QA_PROMPT = PromptTemplate(template=qa_prompt, input_variables=["context", "question"])

refine_qa_prompt = """你的工作是根据新的Context补充Exist_answer。
Exist_answer:
-------------------
{existing_answer}
-------------------
Context：
-------------------
{context}
-------------------
Question: 
-------------------
{question}
-------------------
如果上下文没有用处，请返回原始答案，如果query与提供的材料无关，请回答"对不起，我不知道"，另外也不要回答无关答案"""

REFINE_QA_PROMPT = PromptTemplate(
        input_variables=["existing_answer", "context","question"],
        template=refine_qa_prompt,
        #template="Define {concept} with a real-world example?",
    )

#------------------------
#   提取
#------------------------

extraction_prompt="""你的工作是合同要素提取，根据提供的资料提取合同要素，不要造假答案。如果资料没用，回复“无”。需要提取的要素列表（用“、”分割）：{question}。
示例：----------
要素列表：签订日期、甲方
回答格式如下：
签订日期：2020年1月1日
甲方：无
----------

下面一直到结束是提供的资料：
{context}"""

EXTRACTION_PROMPT = PromptTemplate(template=extraction_prompt, input_variables=["context", "question"])
#你的工作是在现有的Exist_answer基础上，根据新的Context提取"keys"中key的值，并以"key：value"形式给出答案，用空行分隔。

refine_extraction_prompt="""你的工作是合同要素提取，根据提供的资料完善现有的要素信息。如果资料没用，请返回现有的要素信息，不要造假答案。需要提取的要素列表（用“、”分割）：{question}。
示例：----------
要素列表：签订日期、甲方
回答格式如下：
签订日期：2020年1月1日
甲方：无
----------

现有的要素信息如下:
{existing_answer}
---------------------
下面一直到结束是提供的资料：
{context}"""


REFINE_EXTRACTION_PROMPT = PromptTemplate(
        input_variables=["existing_answer", "context","question"],
        template=refine_extraction_prompt,
        #template="Define {concept} with a real-world example?",
    )

foreach_extraction_prompt="""你的工作是从资料中提取要素：{question}。如果资料无法提取要素信息，回复“无”。

下面是提供的资料：
{context}

---------------------
请注意输出格式以“{question}：”开头，紧接着给出答案。"""

FOREACH_EXTRACTION_PROMPT = PromptTemplate(template=foreach_extraction_prompt, input_variables=["context", "question"])
#你的工作是在现有的Exist_answer基础上，根据新的Context提取"keys"中key的值，并以"key：value"形式给出答案，用空行分隔。

foreach_refine_extraction_prompt="""你的工作是从资料中提取要素：{question}。如果资料中要素信息明确，完善现有的答案并返回。如果资料无法提取要素信息，请返回现有的答案。另外也不要回答无关答案。

现有的答案:
{existing_answer}

---------------------

下面是提供的资料：
{context}

---------------------
请注意输出格式以“{question}：”开头，直接给出答案。"""


FOREACH_REFINE_EXTRACTION_PROMPT = PromptTemplate(
        input_variables=["existing_answer", "context","question"],
        template=foreach_refine_extraction_prompt,
        #template="Define {concept} with a real-world example?",
    )


#========================
#   map reduce prompt
#   map reduce prompt
#   map reduce prompt
#========================

glm_chat_question_prompt = """你的工作是找出Context中与Question相关的文本，返回原始文本。如果Question与提供的Context无关，请回答"相关文本：无"，另外也不要回答无关答案：
注意：回复以“相关文本：”开头，不要包含Question本身
Question：
-----------
{question}
-----------
Context：
-----------
{context}
-----------"""

GLM_CHAT_QUESTION_PROMPT = PromptTemplate(
    template=glm_chat_question_prompt,
    input_variables=["question","context"],
)

glm_chat_combine_prompt = """你的工作是根据资料回答问题。问题：{question}，下面是提供的资料。如果资料与问题无关，回复“不知道”，不要添加任何不相关的内容。
注意：每个文本以“相关文本：”开头
<< 资料 >>
{summaries}
"""

GLM_CHAT_COMBINE_PROMPT = PromptTemplate(
    template=glm_chat_combine_prompt,
    input_variables=["question","summaries"],
)

glm_map_extraction_prompt="""你的工作是合同要素提取，根据提供的资料提取合同要素，不要造假答案。如果资料没用，回复“无”。需要提取的要素列表（用“、”分割）：{question}。
示例：----------
要素列表：签订日期、甲方
回答格式如下：
签订日期：2020年1月1日
甲方：无
----------

下面一直到结束是提供的资料：
{context}"""

GLM_MAP_EXTRACTION_PROMPT = PromptTemplate(
    template=glm_map_extraction_prompt,
    input_variables=["question","context"],
)

glm_map_extraction_combine_prompt="""你的工作是将资料里面的要素信息根据需要提取的要素信息汇总。需要提取的要素列表（用“、”分割）：{question}。
示例：----------
要素列表：签订日期、甲方
回答格式如下：
签订日期：2020年1月1日
甲方：无
----------

下面一直到结束是提供的资料：
{summaries}"""

GLM_MAP_EXTRACTION_COMBINE_PROMPT = PromptTemplate(
    template=glm_map_extraction_combine_prompt,
    input_variables=["question","summaries"],
)

#===================================
#   foreach prompt
#===================================
foreach_map_q_extraction_prompt = """你的工作是找出资料中与“{question}”相关的文本，返回原始文本。如果找不到相关文档，请回答"相关文本：无"。不要生成与资料不相关的文字：

下面是提供的资料：
{context}

---------------------
请注意输出格式以“相关文本：”开头。"""

FOREACH_MAP_Q_EXTRACTION_PROMPT = PromptTemplate(
    template=foreach_map_q_extraction_prompt,
    input_variables=["question","context"],
)

foreach_map_extraction_prompt="""你的工作是从资料中提取要素：{question}。如果资料无法提取要素信息，回复“无”。

下面是提供的资料：
{summaries}

---------------------
请注意输出格式以“{question}：”开头，紧接着给出答案。"""


FOREACH_MAP_EXTRACTION_PROMPT = PromptTemplate(
        input_variables=["summaries","question"],
        template=foreach_map_extraction_prompt,
        #template="Define {concept} with a real-world example?",
    )

#========================
#   百度 ernie
#========================
ernie_chat_question_prompt = """现在需要你在'''中的资料中找出与问题相关的段落，回答用“相关资料：”开头。
问题是：{question}。
'''
{context}
'''
注意：如果没有找到与问题相关的段落就回答“无”。"""
ernie_chat_combine_prompt = """现在需要你根据'''中的资料回答问题，如果资料与问题无关就回复“不知道”，不要回答任何不相关的内容。
问题是：{question}。
'''
{summaries}
'''"""
ERNIE_GLM_CHAT_QUESTION_PROMPT = PromptTemplate(
    template=ernie_chat_question_prompt,
    input_variables=["question","context"],
)
ERNIE_GLM_CHAT_COMBINE_PROMPT = PromptTemplate(
    template=ernie_chat_combine_prompt,
    input_variables=["question","summaries"],
)

map_reducce_prompt = """请对下面内容做一个简要总结:

"{text}"

"""
MAP_REDUCE_SUMMARISE_PROMPT = PromptTemplate(template=map_reducce_prompt, input_variables=["text"])

ernie_extraction_prompt="""请从'''包裹的资料中按要求抽取出重要信息，需要你提取的信息列表：{question}。若无法提取相关信息，用"未知"表示。
资料如下：
'''
{context}
'''

输出要求：
以json格式输出，输出json中key必须含有\"{question}\"几项。除json以外不要添加任何内容。
```json
{{
    "XXX":"XXXXXX",
    "XXX":"未知"
}}
```
"""

ERNIE_EXTRACTION_PROMPT = PromptTemplate(template=ernie_extraction_prompt, input_variables=["context", "question"])
#你的工作是在现有的Exist_answer基础上，根据新的Context提取"keys"中key的值，并以"key：value"形式给出答案，用空行分隔。

ernie_refine_extraction_prompt="""由于原始文本太长，关键信息需要分段提取。\"\"\"包裹的是历史提取的信息，请根据'''包裹的新资料补充历史提取的信息中\"未知\"的部分，历史提取的信息中已知部分请保留输出。
历史提取的信息：
\"\"\"{existing_answer}\"\"\"

资料如下：
'''
{context}
'''

输出要求：
如果新资料没有帮助，请返回\"\"\"包裹的历史提取的信息。
以json格式输出，输出json中key必须含有\"{question}\"几项。除json以外不要添加任何内容。
```json
{{
    "XXX":"XXXXXX",
    "XXX":"未知"
}}
```
"""


ERNIE_REFINE_EXTRACTION_PROMPT = PromptTemplate(
        input_variables=["existing_answer", "context","question"],
        template=ernie_refine_extraction_prompt,
    )


#========================
#   route chain prompt
#   route chain prompt
#   route chain prompt
#========================
MULTI_PROMPT_ROUTER_TEMPLATE2 = """\
Given a raw text input to a language model select the model prompt best suited for \
the input. You will be given the names of the available prompts and a description of \
what the prompt is best suited for. You may also revise the original input if you \
think that revising it will ultimately lead to a better response from the language \
model.

<< FORMATTING >>
Return a markdown code snippet with a JSON object formatted to look like:
{{{{
    "destination": string \\ name of the prompt to use or "default"
    "next_inputs": string \\ a potentially modified version of the original input
}}}}

REMEMBER: "destination" MUST be one of the candidate prompt names specified below OR \
it can be "default" if the input is not well suited for any of the candidate prompts.
REMEMBER: "next_inputs" can just be the original input if you don't think any \
modifications are needed.

<< CANDIDATE PROMPTS >>
{destinations}

<< INPUT >>
{{input}}

<< OUTPUT (OUTPUT must be json string and don't include Note) >>
"""

prompt_infos = [
{
    "name": "summer",
    "description": "文档总结和摘要很专业",
},
{
    "name": "expertor",
    "description": "基于输入的文档，回答文档中相关的问题很专业",
},
{
    "name": "extractor",
    "description": "从文档中提取关键信息和事实很专业",
}
]
destinations = [f"{p['name']}: {p['description']}" for p in prompt_infos]
destinations_str = "\n".join(destinations)
router_template = MULTI_PROMPT_ROUTER_TEMPLATE2.format(destinations=destinations_str)
ROUTER_PROMPT = PromptTemplate(
    template=router_template,
    input_variables=["input"],
    output_parser=RouterOutputParser(),
)