Commit caf42399 by 文靖昊

文档去重

parent e85ee1bd
......@@ -176,6 +176,7 @@ def question(chat_request: ChatRequest, token: str = Header(None)):
j ={}
j["page_content"] = d.page_content
j["from_file"] = d.metadata["filename"]
j["page_number"] = 0
docs_json.append(j)
# answer = "test Answer"
if session_id =="":
......@@ -220,6 +221,7 @@ def re_generate(chat_request: ReGenerateRequest, token: str = Header(None)):
j = {}
j["page_content"] = d.page_content
j["from_file"] = d.metadata["filename"]
j["page_number"] = 0
docs_json.append(j)
# answer = "reGenerate Answer"
......
......@@ -62,10 +62,16 @@ class GetSimilarityWithExt:
def get_text_similarity_with_ext(self):
similarity_docs = []
for q in self.question:
print(q)
similarity_doc = self.faiss_db.get_text_similarity(q)
similarity_docs.extend(similarity_doc)
return similarity_docs
content_set = set()
unique_documents = []
for doc in similarity_docs:
content = hash(doc.page_content)
if content not in content_set:
unique_documents.append(doc)
content_set.add(content)
return unique_documents
DEFAULT_PROMPT = """作为一个向量检索助手,你的任务是结合历史记录,从不同角度,为“原问题”生成个不同版本的“检索词”,从而提高向量检索的语义丰富度,提高向量检索的精度。生成的问题要求指向对象清晰明确,并与“原问题语言相同”。例如:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment