Commit a3e8fb77 by 文靖昊

模糊搜索文档和向量相似文档文档去重,多agent实现

parent b904d140
...@@ -40,6 +40,46 @@ Action: ...@@ -40,6 +40,46 @@ Action:
开始!始终以有效的单个操作的 JSON 对象回复。如有必要,请使用工具。如果你知道答案,请直接回复。格式为 Action:```$JSON_BLOB```然后 Observation 开始!始终以有效的单个操作的 JSON 对象回复。如有必要,请使用工具。如果你知道答案,请直接回复。格式为 Action:```$JSON_BLOB```然后 Observation
""" """
PROMPT_AGENT_EXTEND_SYS = """请帮助人类扩展问题,不要回答问题,当人类询问你的时候,请利用工具将问题扩展,并将扩展后的问题返回给人类。您可以使用以下工具:
{tools}
使用 JSON 对象指定工具,提供一个 action 键(工具名称)和一个 action_input 键(工具输入)。
有效的 "action" 值: "Final Answer" 或 {tool_names}
每个 $JSON_BLOB 只提供一个操作,如下所示:
```
{{
"action": $TOOL_NAME,
"action_input": $INPUT
}}
```
按照以下格式:
Question: 输入要扩展的问题
Thought: 考虑前后步骤
Action:
```
$JSON_BLOB
```
Observation: 操作结果
...(重复 Thought/Action/Observation N 次)
Thought: 我知道如何扩展问题
Action:
```
{{
"action": "Final Answer",
"action_input": "最终扩展问题给人类"
}}
```
开始!始终以有效的单个操作的 JSON 对象回复。如有必要,请使用工具。格式为 Action:```$JSON_BLOB```然后 Observation
"""
PROMPT_AGENT_CHART_SYS = """请尽量帮助人类并准确回答问题。您可以使用以下工具: PROMPT_AGENT_CHART_SYS = """请尽量帮助人类并准确回答问题。您可以使用以下工具:
{tools} {tools}
......
...@@ -103,6 +103,14 @@ class GetSimilarityWithExt: ...@@ -103,6 +103,14 @@ class GetSimilarityWithExt:
result.extend(rerank_docs2[:3]) result.extend(rerank_docs2[:3])
end = time.time() end = time.time()
print('重排2 time: %s Seconds' % (end - start)) print('重排2 time: %s Seconds' % (end - start))
print(len(result))
content_set = set()
unique_documents = []
for doc in result:
content = hash(doc.page_content)
if content not in content_set:
unique_documents.append(doc)
content_set.add(content)
# for doc in rerank_docs2: # for doc in rerank_docs2:
# m[hash(doc.page_content)] = doc # m[hash(doc.page_content)] = doc
# rerank_docs2_hash.append(hash(doc.page_content)) # rerank_docs2_hash.append(hash(doc.page_content))
...@@ -119,8 +127,9 @@ class GetSimilarityWithExt: ...@@ -119,8 +127,9 @@ class GetSimilarityWithExt:
# for key in rrf_doc: # for key in rrf_doc:
# d_list.append(m[key]) # d_list.append(m[key])
# print("返回文档数量:",top_k) # print("返回文档数量:",top_k)
self.rerank_docs = result print(len(unique_documents))
return self.join_document(result) self.rerank_docs = unique_documents
return self.join_document(unique_documents)
def get_similarity_doc(self): def get_similarity_doc(self):
return self.similarity_doc_txt return self.similarity_doc_txt
......
...@@ -33,7 +33,7 @@ from src.config.consts import ( ...@@ -33,7 +33,7 @@ from src.config.consts import (
prompt1 prompt1
) )
from src.config.prompts import PROMPT_AGENT_SYS_VARS,PROMPT_AGENT_SYS,PROMPT_AGENT_CHAT_HUMAN,PROMPT_AGENT_CHAT_HUMAN_VARS from src.config.prompts import PROMPT_AGENT_SYS_VARS,PROMPT_AGENT_SYS,PROMPT_AGENT_CHAT_HUMAN,PROMPT_AGENT_CHAT_HUMAN_VARS,PROMPT_AGENT_EXTEND_SYS
base_llm = ChatOpenAI( base_llm = ChatOpenAI(
openai_api_key='xxxxxxxxxxxxx', openai_api_key='xxxxxxxxxxxxx',
...@@ -60,8 +60,8 @@ k_db.connect() ...@@ -60,8 +60,8 @@ k_db.connect()
llm_chain = LLMChain(llm=base_llm, prompt=PromptTemplate(input_variables=["history","context", "question"], template=prompt1), llm_kwargs= {"temperature": 0}) llm_chain = LLMChain(llm=base_llm, prompt=PromptTemplate(input_variables=["history","context", "question"], template=prompt1), llm_kwargs= {"temperature": 0})
tool_rag = RAGQuery(vecstore_faiss,ext,PromptTemplate(input_variables=["history","context", "question"], template=prompt_enhancement_history_template),_db=TxtDoc(k_db),_llm_chain=llm_chain) # tool_rag = RAGQuery(vecstore_faiss,ext,PromptTemplate(input_variables=["history","context", "question"], template=prompt_enhancement_history_template),_db=TxtDoc(k_db),_llm_chain=llm_chain)
tools = [AdministrativeDivision(),RAGQuery(vecstore_faiss,ext,PromptTemplate(input_variables=["history","context", "question"], template=prompt_enhancement_history_template),_db=TxtDoc(k_db),_llm_chain=llm_chain)] tools = [RAGQuery(vecstore_faiss,ext,PromptTemplate(input_variables=["history","context", "question"], template=prompt_enhancement_history_template),_db=TxtDoc(k_db),_llm_chain=llm_chain)]
# input_variables=['agent_scratchpad', 'input', 'tool_names', 'tools','chart_tool'] # input_variables=['agent_scratchpad', 'input', 'tool_names', 'tools','chart_tool']
input_variables=[] input_variables=[]
input_variables.extend(PROMPT_AGENT_CHAT_HUMAN_VARS) input_variables.extend(PROMPT_AGENT_CHAT_HUMAN_VARS)
...@@ -75,6 +75,15 @@ messages=[ ...@@ -75,6 +75,15 @@ messages=[
HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=PROMPT_AGENT_CHAT_HUMAN_VARS, template=PROMPT_AGENT_CHAT_HUMAN)) HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=PROMPT_AGENT_CHAT_HUMAN_VARS, template=PROMPT_AGENT_CHAT_HUMAN))
] ]
administrative_messages=[
# SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['tool_names', 'tools'], template='Respond to the human as helpfully and accurately as possible. You have access to the following tools:\n\n{tools}\n\nUse a json blob to specify a tool by providing an action key (tool name) and an action_input key (tool input).\n\nValid "action" values: "Final Answer" or {tool_names}\n\nProvide only ONE action per $JSON_BLOB, as shown:\n\n```\n{{\n "action": $TOOL_NAME,\n "action_input": $INPUT\n}}\n```\n\nFollow this format:\n\nQuestion: input question to answer\nThought: consider previous and subsequent steps\nAction:\n```\n$JSON_BLOB\n```\nObservation: action result\n... (repeat Thought/Action/Observation N times)\nThought: I know what to respond\nAction:\n```\n{{\n "action": "Final Answer",\n "action_input": "Final response to human"\n}}\n\nBegin! Reminder to ALWAYS respond with a valid json blob of a single action. Use tools if necessary. Respond directly if appropriate. Format is Action:```$JSON_BLOB```then Observation')),
SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=PROMPT_AGENT_SYS_VARS, template=PROMPT_AGENT_EXTEND_SYS)),
MessagesPlaceholder(variable_name='chat_history', optional=True),
# HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['agent_scratchpad', 'input'], template='{input}\n\n{agent_scratchpad}\n (reminder to respond in a JSON blob no matter what)'))
HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=PROMPT_AGENT_CHAT_HUMAN_VARS, template=PROMPT_AGENT_CHAT_HUMAN))
]
prompt = ChatPromptTemplate( prompt = ChatPromptTemplate(
input_variables=input_variables, input_variables=input_variables,
input_types=input_types, input_types=input_types,
...@@ -83,8 +92,22 @@ prompt = ChatPromptTemplate( ...@@ -83,8 +92,22 @@ prompt = ChatPromptTemplate(
) )
administrative_prompt = ChatPromptTemplate(
input_variables=input_variables,
input_types=input_types,
# metadata=metadata,
messages=administrative_messages
)
AdministrativeTools =[AdministrativeDivision()]
# agent = create_structured_chat_agent(llm=base_llm, tools=tools, prompt=prompt) # agent = create_structured_chat_agent(llm=base_llm, tools=tools, prompt=prompt)
agent = create_chart_agent(base_llm, tools, prompt, chart_tool="chart") agent = create_chart_agent(base_llm, tools, prompt, chart_tool="chart")
administrative_agent = create_chart_agent(base_llm, AdministrativeTools, administrative_prompt, chart_tool="chart")
administrative_agent_executor = AgentExecutor(agent=administrative_agent, tools=AdministrativeTools,verbose=True,handle_parsing_errors=True,return_intermediate_steps=True)
agent_executor = AgentExecutor(agent=agent, tools=tools,verbose=True,handle_parsing_errors=True,return_intermediate_steps=True) agent_executor = AgentExecutor(agent=agent, tools=tools,verbose=True,handle_parsing_errors=True,return_intermediate_steps=True)
history = [] history = []
h1 = [] h1 = []
...@@ -95,26 +118,18 @@ h1 = [] ...@@ -95,26 +118,18 @@ h1 = []
h1.append("长沙县年降雨量") h1.append("长沙县年降雨量")
h1.append("长沙县年雨量平均为50ml") h1.append("长沙县年雨量平均为50ml")
history.append(h1) history.append(h1)
prompt = ""
for h in history:
prompt += "问:{}\n答:{}\n".format(h[0], h[1])
print(prompt)
# res = agent_executor.invoke({"input":"以下历史对话记录: "+prompt+"以下是问题:"+"攸县、长沙县、化隆县和大通县谁的年平均降雨量大"}) # res = agent_executor.invoke({"input":"以下历史对话记录: "+prompt+"以下是问题:"+"攸县、长沙县、化隆县和大通县谁的年平均降雨量大"})
res = agent_executor.invoke({"input":"西宁市各区县年平均降雨量","histories":history}) res_a = administrative_agent_executor.invoke({"input":"西宁市各区县谁的年平均降雨量大","histories":history})
print(res_a)
print("====== result: ======")
print(res) # res = agent_executor.invoke({"input":"西宁市各区县年平均降雨量","histories":history})
print(type(res)) #
print(res["output"]) # print(res)
docs_json = [] # docs_json = []
for step in res["intermediate_steps"]: # for step in res["intermediate_steps"]:
print(type(step[0].tool)) # if "rag_query" ==step[0].tool:
print(step[0].tool) # j = json.loads(step[1]["参考文档"], strict=False)
if "rag_query" ==step[0].tool: # docs_json.extend(j)
print(True) # print(docs_json)
print(step[1]["参考文档"])
j = json.loads(step[1]["参考文档"], strict=False)
docs_json.extend(j)
print(docs_json)
print(len(docs_json))
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment